#StackBounty: #performance #python-3.x #iterator #yield #next Same iterator algorithm using function and yield two times faster compare…

Bounty: 50

In the below provided Python script code exactly the same algorithm for creating unique combinations from a non-unique list of elements was implemented using a Python function with yield and using a class with __next__. The code is ready to run after copy/paste, so you can see it for yourself what I am speaking about.

The same phenomenon observed for pure Python code propagates into C code of a Python extension module created out of the script code by Cython, so it is not limited to Python level code because it doesn’t vanish at the C code level.

The question is:

Where does the huge difference in speed of execution come from?
Is there anything that can be done to get both code variants to run at comparable speed? Is there something went wrong with the class/next implementation compared to the function/yield variant? Both are to my knowledge exactly the same code …

Here the code (tweaking the number in the highlighted line changes the level of uniqueness of elements in the list the combinations are generated from what has a huge impact on the running time):

def uniqCmboYieldIter(lstItems, lenCmbo):
    dctCounter = {}
    lenLstItems = len(lstItems)
    for idx in range(lenLstItems):
        item = lstItems[idx]
        if item in dctCounter.keys(): 
            dctCounter[item] += 1
        else: 
            dctCounter[item]  = 1
        #:if
    #:for     
    lstUniqs   = sorted(dctCounter.keys())
    lstCntRpts = [dctCounter[item] for item in lstUniqs]
    lenUniqs   = len(lstUniqs)
    cmboAsIdxUniqs = [None] * lenCmbo
    multiplicities = [0] * lenUniqs
    idxIntoCmbo, idxIntoUniqs = 0, 0

    while idxIntoCmbo != lenCmbo and idxIntoUniqs != lenUniqs:
        count = min(lstCntRpts[idxIntoUniqs], lenCmbo-idxIntoCmbo)
        cmboAsIdxUniqs[idxIntoCmbo : idxIntoCmbo + count] = [idxIntoUniqs] * count
        multiplicities[idxIntoUniqs] = count
        idxIntoCmbo  += count
        idxIntoUniqs += 1

    if idxIntoCmbo != lenCmbo:
        return

    while True:
        yield tuple(lstUniqs[idxUniqs] for idxUniqs in cmboAsIdxUniqs)

        for idxIntoCmbo in reversed(range(lenCmbo)):
            x = cmboAsIdxUniqs[idxIntoCmbo]
            y = x + 1

            if y < lenUniqs and multiplicities[y] < lstCntRpts[y]:
                break
        else:
            return

        for idxIntoCmbo in range(idxIntoCmbo, lenCmbo):
            x = cmboAsIdxUniqs[idxIntoCmbo]
            cmboAsIdxUniqs[idxIntoCmbo] = y
            multiplicities[x] -= 1
            multiplicities[y] += 1
            # print("# multiplicities:", multiplicities)


            while y != lenUniqs and multiplicities[y] == lstCntRpts[y]:
                y += 1

            if y == lenUniqs:
                break


class uniqCmboClassIter:
    # ----------------------------------------------------------------------------------------------
    def __iter__(self):
       return self

    # ----------------------------------------------------------------------------------------------
    def __init__(self, lstItems, lenCmbo):
        dctCounter = {}
        lenLstItems = len(lstItems)
        for idx in range(lenLstItems):
            item = lstItems[idx]
            if item in dctCounter.keys(): 
                dctCounter[item] += 1
            else: 
                dctCounter[item]  = 1
            #:if
        #:for     

        self.lstUniqs   = sorted(dctCounter.keys())
        self.lenUniqs   = len(self.lstUniqs)
        self.lstCntRpts = [dctCounter[item] for item in self.lstUniqs]

        self.lenCmbo        = lenCmbo
        self.cmboAsIdxUniqs = [None] * lenCmbo
        self.multiplicities = [0] * self.lenUniqs
        self.idxIntoCmbo, self.idxIntoUniqs = 0, 0

        while self.idxIntoCmbo != self.lenCmbo and self.idxIntoUniqs != self.lenUniqs:
            count = min(self.lstCntRpts[self.idxIntoUniqs], self.lenCmbo-self.idxIntoCmbo)
            self.cmboAsIdxUniqs[self.idxIntoCmbo : self.idxIntoCmbo + count] = [self.idxIntoUniqs] * count
            self.multiplicities[self.idxIntoUniqs] = count
            self.idxIntoCmbo  += count
            self.idxIntoUniqs += 1
            # print("self.multiplicities:", self.multiplicities)
            # print("self.cmboAsIdxUniqs:", self.cmboAsIdxUniqs)

        if self.idxIntoCmbo != self.lenCmbo:
            return

        self.stopIteration = False
        self.x = None
        self.y = None

        return

    # ----------------------------------------------------------------------------------------------
    def __next__(self):

        if self.stopIteration is True:
            raise StopIteration
            return

        nextCmbo = tuple(self.lstUniqs[idxUniqs] for idxUniqs in self.cmboAsIdxUniqs)

        for self.idxIntoCmbo in reversed(range(self.lenCmbo)):
            self.x = self.cmboAsIdxUniqs[self.idxIntoCmbo]
            self.y = self.x + 1

            if self.y < self.lenUniqs and self.multiplicities[self.y] < self.lstCntRpts[self.y]:
                break
        else:
            self.stopIteration = True
            return nextCmbo

        for self.idxIntoCmbo in range(self.idxIntoCmbo, self.lenCmbo):
            self.x = self.cmboAsIdxUniqs[self.idxIntoCmbo]
            self.cmboAsIdxUniqs[self.idxIntoCmbo] = self.y
            self.multiplicities[self.x] -= 1
            self.multiplicities[self.y] += 1
            # print("# multiplicities:", multiplicities)


            while self.y != self.lenUniqs and self.multiplicities[self.y] == self.lstCntRpts[self.y]:
                self.y += 1

            if self.y == self.lenUniqs:
                break

        return nextCmbo

# ============================================================================================================================================
lstSize   = 48 # 48
uniqLevel =  12 # (7 ~60% unique) higher level => more unique items in the generated list 
aList = []
from random import randint
for _ in range(lstSize):
    aList.append( ( randint(1,uniqLevel), randint(1,uniqLevel) ) )
lenCmbo = 6
percUnique = 100.0 - 100.0*(lstSize-len(set(aList)))/lstSize
print("========================  lenCmbo:", lenCmbo, 
      "   sizeOfList:", len(aList), 
      "   noOfUniqueInList", len(set(aList)), 
      "   percUnique",  int(percUnique) ) 

import time
from itertools import combinations
# itertools.combinations
# ---
# def   uniqCmboYieldIter(lstItems, lenCmbo):
# class uniqCmboClassIter: def __init__(self, lstItems, lenCmbo):
# ---
start_time = time.time()
print("Combos:%9i"%len(list(combinations(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(     combinations(aList, lenCmbo)))):",  "{:9.5f}".format(duration), "seconds.")

start_time = time.time()
print("Combos:%9i"%len(list(uniqCmboYieldIter(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):",  "{:9.5f}".format(duration), "seconds.")

start_time = time.time()
print("Combos:%9i"%len(list(uniqCmboClassIter(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(uniqCmboClassIter(aList, lenCmbo)))):", "{:9.5f}".format(duration), "seconds.")

and the timings on my box:

>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
========================  lenCmbo: 6    sizeOfList: 48    noOfUniqueInList 32    percUnique 66
Combos: 12271512  print(len(list(     combinations(aList, lenCmbo)))):   2.04635 seconds.
Combos:  1296058  print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):   3.25447 seconds.
Combos:  1296058  print(len(list(uniqCmboClassIter(aList, lenCmbo)))):   5.97371 seconds.
>Exit code: 0
  [2017-05-02_03:23]  207474 <-Chrs,Keys-> 1277194 OnSave(): '/home/claudio/CgMint18/_Cg.DIR/ClaudioOnline/at-stackoverflow/bySubject/uniqueCombinations/nonRecursiveUniqueCombos_Cg.py'
>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
========================  lenCmbo: 6    sizeOfList: 48    noOfUniqueInList 22    percUnique 45
Combos: 12271512  print(len(list(     combinations(aList, lenCmbo)))):   2.05199 seconds.
Combos:   191072  print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):   0.47343 seconds.
Combos:   191072  print(len(list(uniqCmboClassIter(aList, lenCmbo)))):   0.89860 seconds.
>Exit code: 0
  [2017-05-02_03:23]  207476 <-Chrs,Keys-> 1277202 OnSave(): '/home/claudio/CgMint18/_Cg.DIR/ClaudioOnline/at-stackoverflow/bySubject/uniqueCombinations/nonRecursiveUniqueCombos_Cg.py'
>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
========================  lenCmbo: 6    sizeOfList: 48    noOfUniqueInList 43    percUnique 89
Combos: 12271512  print(len(list(     combinations(aList, lenCmbo)))):   2.17285 seconds.
Combos:  6560701  print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):  16.72573 seconds.
Combos:  6560701  print(len(list(uniqCmboClassIter(aList, lenCmbo)))):  31.17714 seconds.
>Exit code: 0


Get this bounty!!!