#StackBounty: #menus #navigation #performance #cache Nav and logo loading each time causing menu to move JointsWP – Foundation 6

Bounty: 50

Hi I was wondering if anyone could help.

I’m creating a site using JointsWP Foundation 6 theme and have created a new fixed side menu which includes the logo and social links. My problem is everytime a user click on the menu it reloads causing a shift – is there a way of stopping this – is it a page load issue or have i come about it the wrong way? I tried adding a caching plugin but it hasn’t seemed to help. Any suggestions appreciated.

Here is examples of my code:

<body <?php body_class(); ?>>

    

and the page.php

<?php get_header(); ?>
    
--> <!--
--> --> <main id="main" class="large-9 medium-9 columns contentSection" role="main"> <?php if (have_posts()) : while (have_posts()) : the_post(); ?> <?php get_template_part( 'parts/loop', 'page' ); ?> <?php endwhile; endif; ?> <!--</main> <!-- end #main --> <!--</div> <!-- end #inner-content --> <!--</div> <!-- end #content -->

Edit:

i have added 2 test pages so that you can see – biggreenspace.com/test-page-1 and you will be able to navigate to test page 2 (the other menu will take you to the maintenance screen). This primarily happens in Chrome and Firefox – not in IE edge.


Get this bounty!!!

#StackBounty: #python #performance #regex #natural-language-proc #cython Using lots of regex substitutions to tokenize text

Bounty: 50

I authored a piece of code that was merged into the nltk codebase. It is full of regex substitutions:

import re
from six import text_type

from nltk.tokenize.api import TokenizerI

class ToktokTokenizer(TokenizerI):
    """
    This is a Python port of the tok-tok.pl from
    https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl

    >>> toktok = ToktokTokenizer()
    >>> text = u'Is 9.5 or 525,600 my favorite number?'
    >>> print (toktok.tokenize(text, return_str=True))
    Is 9.5 or 525,600 my favorite number ?
    >>> text = u'The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things'
    >>> print (toktok.tokenize(text, return_str=True))
    The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things
    >>> text = u'xa1This, is a sentence with weirdxbb symbolsu2026 appearing everywherexbf'
    >>> expected = u'xa1 This , is a sentence with weird xbb symbols u2026 appearing everywhere xbf'
    >>> assert toktok.tokenize(text, return_str=True) == expected
    >>> toktok.tokenize(text) == [u'xa1', u'This', u',', u'is', u'a', u'sentence', u'with', u'weird', u'xbb', u'symbols', u'u2026', u'appearing', u'everywhere', u'xbf']
    True
    """
    # Replace non-breaking spaces with normal spaces.
    NON_BREAKING = re.compile(u"u00A0"), " "

    # Pad some funky punctuation.
    FUNKY_PUNCT_1 = re.compile(u'([،;؛¿!"])}»›”؟¡%٪°±©®।॥…])'), r" 1 "
    # Pad more funky punctuation.
    FUNKY_PUNCT_2 = re.compile(u'([({[“‘„‚«‹「『])'), r" 1 "
    # Pad En dash and em dash
    EN_EM_DASHES = re.compile(u'([–—])'), r" 1 "

    # Replace problematic character with numeric character reference.
    AMPERCENT = re.compile('& '), '&amp; '
    TAB = re.compile('t'), ' 	 '
    PIPE = re.compile('|'), ' | '

    # Pad numbers with commas to keep them from further tokenization. 
    COMMA_IN_NUM = re.compile(r'(?<!,)([,،])(?![,d])'), r' 1 '

    # Just pad problematic (often neurotic) hyphen/single quote, etc.
    PROB_SINGLE_QUOTES = re.compile(r"(['’`])"), r' 1 '
    # Group ` ` stupid quotes ' ' into a single token.
    STUPID_QUOTES_1 = re.compile(r" ` ` "), r" `` "
    STUPID_QUOTES_2 = re.compile(r" ' ' "), r" '' "

    # Don't tokenize period unless it ends the line and that it isn't 
    # preceded by another period, e.g.  
    # "something ..." -> "something ..." 
    # "something." -> "something ." 
    FINAL_PERIOD_1 = re.compile(r"(?<!.).$"), r" ."
    # Don't tokenize period unless it ends the line eg. 
    # " ... stuff." ->  "... stuff ."
    FINAL_PERIOD_2 = re.compile(r"""(?<!.).s*(["'’»›”]) *$"""), r" . 1"

    # Treat continuous commas as fake German,Czech, etc.: „
    MULTI_COMMAS = re.compile(r'(,{2,})'), r' 1 '
    # Treat continuous dashes as fake en-dash, etc.
    MULTI_DASHES = re.compile(r'(-{2,})'), r' 1 '
    # Treat multiple periods as a thing (eg. ellipsis)
    MULTI_DOTS = re.compile(r'(.{2,})'), r' 1 '

    # This is the p{Open_Punctuation} from Perl's perluniprops
    # see http://perldoc.perl.org/perluniprops.html
    OPEN_PUNCT = text_type(u'([{u0f3au0f3cu169bu201au201eu2045u207d'
                            u'u208du2329u2768u276au276cu276eu2770u2772'
                            u'u2774u27c5u27e6u27e8u27eau27ecu27eeu2983'
                            u'u2985u2987u2989u298bu298du298fu2991u2993'
                            u'u2995u2997u29d8u29dau29fcu2e22u2e24u2e26'
                            u'u2e28u3008u300au300cu300eu3010u3014u3016'
                            u'u3018u301au301dufd3eufe17ufe35ufe37ufe39'
                            u'ufe3bufe3dufe3fufe41ufe43ufe47ufe59ufe5b'
                            u'ufe5duff08uff3buff5buff5fuff62')
    # This is the p{Close_Punctuation} from Perl's perluniprops
    CLOSE_PUNCT = text_type(u')]}u0f3bu0f3du169cu2046u207eu208eu232a'
                            u'u2769u276bu276du276fu2771u2773u2775u27c6'
                            u'u27e7u27e9u27ebu27edu27efu2984u2986u2988'
                            u'u298au298cu298eu2990u2992u2994u2996u2998'
                            u'u29d9u29dbu29fdu2e23u2e25u2e27u2e29u3009'
                            u'u300bu300du300fu3011u3015u3017u3019u301b'
                            u'u301eu301fufd3fufe18ufe36ufe38ufe3aufe3c'
                            u'ufe3eufe40ufe42ufe44ufe48ufe5aufe5cufe5e'
                            u'uff09uff3duff5duff60uff63')
    # This is the p{Close_Punctuation} from Perl's perluniprops
    CURRENCY_SYM = text_type(u'$xa2xa3xa4xa5u058fu060bu09f2u09f3u09fb'
                             u'u0af1u0bf9u0e3fu17dbu20a0u20a1u20a2u20a3'
                             u'u20a4u20a5u20a6u20a7u20a8u20a9u20aau20ab'
                             u'u20acu20adu20aeu20afu20b0u20b1u20b2u20b3'
                             u'u20b4u20b5u20b6u20b7u20b8u20b9u20baua838'
                             u'ufdfcufe69uff04uffe0uffe1uffe5uffe6')

    # Pad spaces after opening punctuations.
    OPEN_PUNCT_RE = re.compile(u'([{}])'.format(OPEN_PUNCT)), r'1 '
    # Pad spaces before closing punctuations.
    CLOSE_PUNCT_RE = re.compile(u'([{}])'.format(CLOSE_PUNCT)), r'1 '
    # Pad spaces after currency symbols.
    CURRENCY_SYM_RE = re.compile(u'([{}])'.format(CURRENCY_SYM)), r'1 '

    # Use for tokenizing URL-unfriendly characters: [:/?#]
    URL_FOE_1 = re.compile(r':(?!//)'), r' : ' # in perl s{:(?!//)}{ : }g;
    URL_FOE_2 = re.compile(r'?(?!S)'), r' ? ' # in perl s{?(?!S)}{ ? }g;
    # in perl: m{://} or m{S+.S+/S+} or s{/}{ / }g;
    URL_FOE_3 = re.compile(r'(://)[S+.S+/S+][/]'), ' / '
    URL_FOE_4 = re.compile(r' /'), r' / ' # s{ /}{ / }g;

    # Left/Right strip, i.e. remove heading/trailing spaces.
    # These strip regexes should NOT be used,
    # instead use str.lstrip(), str.rstrip() or str.strip() 
    # (They are kept for reference purposes to the original toktok.pl code)  
    LSTRIP = re.compile(r'^ +'), ''
    RSTRIP = re.compile(r's+$'),'n' 
    # Merge multiple spaces.
    ONE_SPACE = re.compile(r' {2,}'), ' '

    TOKTOK_REGEXES = [NON_BREAKING, FUNKY_PUNCT_1, 
                      URL_FOE_1, URL_FOE_2, URL_FOE_3, URL_FOE_4,
                      AMPERCENT, TAB, PIPE,
                      OPEN_PUNCT_RE, CLOSE_PUNCT_RE, 
                      MULTI_COMMAS, COMMA_IN_NUM, FINAL_PERIOD_2,
                      PROB_SINGLE_QUOTES, STUPID_QUOTES_1, STUPID_QUOTES_2,
                      CURRENCY_SYM_RE, EN_EM_DASHES, MULTI_DASHES, MULTI_DOTS,
                      FINAL_PERIOD_1, FINAL_PERIOD_2, ONE_SPACE]

    def tokenize(self, text, return_str=False):
        text = text_type(text) # Converts input string into unicode.
        for regexp, subsitution in self.TOKTOK_REGEXES:
            text = regexp.sub(subsitution, text)
        # Finally, strips heading and trailing spaces
        # and converts output string into unicode.
        text = text_type(text.strip()) 
        return text if return_str else text.split()

Is there a way to make the subtituition faster? E.g.

  • Combine the chain of regexes into one super regex.
  • Combine some of the regexes
  • Coding it in Cython (but Cython regexes are slow, no?)
  • Running the regex substitution in Julia and wrapping Julia code in Python

The use case for the tokenize() function usually takes a single input but if the same function is called 1,000,000,000 times, it’s rather slow and the GIL is going to lock up the core and process each sentence at a time.

The aim of the question is to ask for ways to speed up a Python code that’s made up of regex substitution, esp. when running the tokenize() function for 1,000,000,000+ times.

If Cython/Julia or any faster language + wrapper is suggested, it would be good if you give an one regex example of how the regex is written in Cython/Julia/Others and the suggestion on how the wrapper would look like.


Get this bounty!!!

#StackBounty: #ssd #performance SanDisk SSD Plus: Half the performance on Linux than on Windows?

Bounty: 50

I have two SSDs in my laptop:

  • Crucial MX300 725GB –> /dev/sda
  • SanDisk SSD Plus 240GB –> /dev/sdb

They’re performance reads on Linux and Windows like this:

Crucial MX300 –> same on both OSs

sudo hdparm -tT /dev/sda # Crucial
Timing cached reads:   13700 MB in  2.00 seconds = 6854.30 MB/sec
Timing buffered disk reads: 1440 MB in  3.00 seconds = 479.58 MB/sec

Crucial MX300 725GB

SanDisk Plus –> way faster on Windows!

sudo hdparm -tT /dev/sdb # SanDisk
Timing cached reads:   7668 MB in  2.00 seconds = 3834.92 MB/sec
Timing buffered disk reads: 798 MB in  3.00 seconds = 265.78 MB/sec # TOO LOW !!

SanDisk

The sequential read performance of the SanDisk on Linux is about half of its performance on Windows!

My Question is of course: Why and can that be fixed? Is this due to the SanDisk SSD Plus being handled as a SCSI drive?

From syslog:

~$ grep SDSSD /var/log/syslog
systemd[1]: Found device SanDisk_SDSSDA240G
kernel: [    2.152138] ata2.00: ATA-9: SanDisk SDSSDA240G, Z32070RL, max UDMA/133
kernel: [    2.174689] scsi 1:0:0:0: Direct-Access     ATA      SanDisk SDSSDA24 70RL PQ: 0 ANSI: 5
smartd[1035]: Device: /dev/sdb [SAT], SanDisk SDSSDA240G, S/N:162783441004, WWN:5-001b44-4a404e4f0, FW:Z32070RL, 240 GB
smartd[1035]: Device: /dev/sdb [SAT], state read from /var/lib/smartmontools/smartd.SanDisk_SDSSDA240G-162783441004.ata.state
smartd[1035]: Device: /dev/sdb [SAT], state written to /var/lib/smartmontools/smartd.SanDisk_SDSSDA240G-162783441004.ata.state

Compared to the Crucial MX300 which has on linux almost the same performance as on Windows:

~$ grep MX300 /var/log/syslog
systemd[1]: Found device Crucial_CT750MX300SSD1
kernel: [    1.775520] ata1.00: ATA-10: Crucial_CT750MX300SSD1,  M0CR050, max UDMA/133
smartd[1035]: Device: /dev/sda [SAT], Crucial_CT750MX300SSD1, S/N:16251486AC40, WWN:5-00a075-11486ac40, FW:M0CR050, 750 GB
smartd[1035]: Device: /dev/sda [SAT], state read from /var/lib/smartmontools/smartd.Crucial_CT750MX300SSD1-16251486AC40.ata.state
smartd[1035]: Device: /dev/sda [SAT], state written to /var/lib/smartmontools/smartd.Crucial_CT750MX300SSD1-16251486AC40.ata.state

Any help is very welcome!

Edit:

The difference that hdparm is showing on Linux is very real. I created two identical directories, one in each of the two drives, each directory containing about 25Gb of files (36395 files), and ran the exact same hashdeep checksum creation script on both dirs (the script just creates a md5-checksum for every file in the test dirs and stores all the checksums in one single file). These are the results:

test-sandisk# time create-file-integrity-md5sums.sh .
real    1m49.000s
user    1m24.868s
sys 0m15.808s

test-mx300# time create-file-integrity-md5sums.sh .
real    0m54.180s
user    1m4.628s
sys 0m11.640s

Same test with a single 7Gb file:

test-sandisk# time create-file-integrity-md5sums.sh .
real    0m26.986s
user    0m19.168s
sys 0m3.232s


test-mx300# time create-file-integrity-md5sums.sh .
real    0m17.285s
user    0m16.248s
sys 0m1.368s


Get this bounty!!!

#StackBounty: #performance #python-3.x #iterator #yield #next Same iterator algorithm using function and yield two times faster compare…

Bounty: 50

In the below provided Python script code exactly the same algorithm for creating unique combinations from a non-unique list of elements was implemented using a Python function with yield and using a class with __next__. The code is ready to run after copy/paste, so you can see it for yourself what I am speaking about.

The same phenomenon observed for pure Python code propagates into C code of a Python extension module created out of the script code by Cython, so it is not limited to Python level code because it doesn’t vanish at the C code level.

The question is:

Where does the huge difference in speed of execution come from?
Is there anything that can be done to get both code variants to run at comparable speed? Is there something went wrong with the class/next implementation compared to the function/yield variant? Both are to my knowledge exactly the same code …

Here the code (tweaking the number in the highlighted line changes the level of uniqueness of elements in the list the combinations are generated from what has a huge impact on the running time):

def uniqCmboYieldIter(lstItems, lenCmbo):
    dctCounter = {}
    lenLstItems = len(lstItems)
    for idx in range(lenLstItems):
        item = lstItems[idx]
        if item in dctCounter.keys(): 
            dctCounter[item] += 1
        else: 
            dctCounter[item]  = 1
        #:if
    #:for     
    lstUniqs   = sorted(dctCounter.keys())
    lstCntRpts = [dctCounter[item] for item in lstUniqs]
    lenUniqs   = len(lstUniqs)
    cmboAsIdxUniqs = [None] * lenCmbo
    multiplicities = [0] * lenUniqs
    idxIntoCmbo, idxIntoUniqs = 0, 0

    while idxIntoCmbo != lenCmbo and idxIntoUniqs != lenUniqs:
        count = min(lstCntRpts[idxIntoUniqs], lenCmbo-idxIntoCmbo)
        cmboAsIdxUniqs[idxIntoCmbo : idxIntoCmbo + count] = [idxIntoUniqs] * count
        multiplicities[idxIntoUniqs] = count
        idxIntoCmbo  += count
        idxIntoUniqs += 1

    if idxIntoCmbo != lenCmbo:
        return

    while True:
        yield tuple(lstUniqs[idxUniqs] for idxUniqs in cmboAsIdxUniqs)

        for idxIntoCmbo in reversed(range(lenCmbo)):
            x = cmboAsIdxUniqs[idxIntoCmbo]
            y = x + 1

            if y < lenUniqs and multiplicities[y] < lstCntRpts[y]:
                break
        else:
            return

        for idxIntoCmbo in range(idxIntoCmbo, lenCmbo):
            x = cmboAsIdxUniqs[idxIntoCmbo]
            cmboAsIdxUniqs[idxIntoCmbo] = y
            multiplicities[x] -= 1
            multiplicities[y] += 1
            # print("# multiplicities:", multiplicities)


            while y != lenUniqs and multiplicities[y] == lstCntRpts[y]:
                y += 1

            if y == lenUniqs:
                break


class uniqCmboClassIter:
    # ----------------------------------------------------------------------------------------------
    def __iter__(self):
       return self

    # ----------------------------------------------------------------------------------------------
    def __init__(self, lstItems, lenCmbo):
        dctCounter = {}
        lenLstItems = len(lstItems)
        for idx in range(lenLstItems):
            item = lstItems[idx]
            if item in dctCounter.keys(): 
                dctCounter[item] += 1
            else: 
                dctCounter[item]  = 1
            #:if
        #:for     

        self.lstUniqs   = sorted(dctCounter.keys())
        self.lenUniqs   = len(self.lstUniqs)
        self.lstCntRpts = [dctCounter[item] for item in self.lstUniqs]

        self.lenCmbo        = lenCmbo
        self.cmboAsIdxUniqs = [None] * lenCmbo
        self.multiplicities = [0] * self.lenUniqs
        self.idxIntoCmbo, self.idxIntoUniqs = 0, 0

        while self.idxIntoCmbo != self.lenCmbo and self.idxIntoUniqs != self.lenUniqs:
            count = min(self.lstCntRpts[self.idxIntoUniqs], self.lenCmbo-self.idxIntoCmbo)
            self.cmboAsIdxUniqs[self.idxIntoCmbo : self.idxIntoCmbo + count] = [self.idxIntoUniqs] * count
            self.multiplicities[self.idxIntoUniqs] = count
            self.idxIntoCmbo  += count
            self.idxIntoUniqs += 1
            # print("self.multiplicities:", self.multiplicities)
            # print("self.cmboAsIdxUniqs:", self.cmboAsIdxUniqs)

        if self.idxIntoCmbo != self.lenCmbo:
            return

        self.stopIteration = False
        self.x = None
        self.y = None

        return

    # ----------------------------------------------------------------------------------------------
    def __next__(self):

        if self.stopIteration is True:
            raise StopIteration
            return

        nextCmbo = tuple(self.lstUniqs[idxUniqs] for idxUniqs in self.cmboAsIdxUniqs)

        for self.idxIntoCmbo in reversed(range(self.lenCmbo)):
            self.x = self.cmboAsIdxUniqs[self.idxIntoCmbo]
            self.y = self.x + 1

            if self.y < self.lenUniqs and self.multiplicities[self.y] < self.lstCntRpts[self.y]:
                break
        else:
            self.stopIteration = True
            return nextCmbo

        for self.idxIntoCmbo in range(self.idxIntoCmbo, self.lenCmbo):
            self.x = self.cmboAsIdxUniqs[self.idxIntoCmbo]
            self.cmboAsIdxUniqs[self.idxIntoCmbo] = self.y
            self.multiplicities[self.x] -= 1
            self.multiplicities[self.y] += 1
            # print("# multiplicities:", multiplicities)


            while self.y != self.lenUniqs and self.multiplicities[self.y] == self.lstCntRpts[self.y]:
                self.y += 1

            if self.y == self.lenUniqs:
                break

        return nextCmbo

# ============================================================================================================================================
lstSize   = 48 # 48
uniqLevel =  12 # (7 ~60% unique) higher level => more unique items in the generated list 
aList = []
from random import randint
for _ in range(lstSize):
    aList.append( ( randint(1,uniqLevel), randint(1,uniqLevel) ) )
lenCmbo = 6
percUnique = 100.0 - 100.0*(lstSize-len(set(aList)))/lstSize
print("========================  lenCmbo:", lenCmbo, 
      "   sizeOfList:", len(aList), 
      "   noOfUniqueInList", len(set(aList)), 
      "   percUnique",  int(percUnique) ) 

import time
from itertools import combinations
# itertools.combinations
# ---
# def   uniqCmboYieldIter(lstItems, lenCmbo):
# class uniqCmboClassIter: def __init__(self, lstItems, lenCmbo):
# ---
start_time = time.time()
print("Combos:%9i"%len(list(combinations(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(     combinations(aList, lenCmbo)))):",  "{:9.5f}".format(duration), "seconds.")

start_time = time.time()
print("Combos:%9i"%len(list(uniqCmboYieldIter(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):",  "{:9.5f}".format(duration), "seconds.")

start_time = time.time()
print("Combos:%9i"%len(list(uniqCmboClassIter(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(uniqCmboClassIter(aList, lenCmbo)))):", "{:9.5f}".format(duration), "seconds.")

and the timings on my box:

>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
========================  lenCmbo: 6    sizeOfList: 48    noOfUniqueInList 32    percUnique 66
Combos: 12271512  print(len(list(     combinations(aList, lenCmbo)))):   2.04635 seconds.
Combos:  1296058  print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):   3.25447 seconds.
Combos:  1296058  print(len(list(uniqCmboClassIter(aList, lenCmbo)))):   5.97371 seconds.
>Exit code: 0
  [2017-05-02_03:23]  207474 <-Chrs,Keys-> 1277194 OnSave(): '/home/claudio/CgMint18/_Cg.DIR/ClaudioOnline/at-stackoverflow/bySubject/uniqueCombinations/nonRecursiveUniqueCombos_Cg.py'
>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
========================  lenCmbo: 6    sizeOfList: 48    noOfUniqueInList 22    percUnique 45
Combos: 12271512  print(len(list(     combinations(aList, lenCmbo)))):   2.05199 seconds.
Combos:   191072  print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):   0.47343 seconds.
Combos:   191072  print(len(list(uniqCmboClassIter(aList, lenCmbo)))):   0.89860 seconds.
>Exit code: 0
  [2017-05-02_03:23]  207476 <-Chrs,Keys-> 1277202 OnSave(): '/home/claudio/CgMint18/_Cg.DIR/ClaudioOnline/at-stackoverflow/bySubject/uniqueCombinations/nonRecursiveUniqueCombos_Cg.py'
>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
========================  lenCmbo: 6    sizeOfList: 48    noOfUniqueInList 43    percUnique 89
Combos: 12271512  print(len(list(     combinations(aList, lenCmbo)))):   2.17285 seconds.
Combos:  6560701  print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):  16.72573 seconds.
Combos:  6560701  print(len(list(uniqCmboClassIter(aList, lenCmbo)))):  31.17714 seconds.
>Exit code: 0


Get this bounty!!!

#StackBounty: #7 #theming #database #performance Is putting content in the codebase or the database better for performance?

Bounty: 50

I recently created a website footer using a block template. Some of the footer’s content was entered through the block’s wysiwyg (stored in the database), but other code-heavy elements were placed in the block template file (stored in code). I’ve noticed that the footer block loads slow at times. Other times, the content inserted through the wysiwyg loads a few seconds before the content that was inserted in the the block template.

I’m curious, is storing html content in the database better for performance than storing html content in template files?

EDIT: After I posted this, I also realized that the footer is being rendered with Blocks whereas the rest of the page is being render with Panels. I’m not sure if that would have any effects of its performance.


Get this bounty!!!

#StackBounty: #sql-server #performance #performance-tuning #multi-thread Identify threads with maximum difference in time

Bounty: 50

I have many parallel executing queries in my SQL Server database. I need to find out queries where the difference in time for the longest thread and the shortest thread is high (excluding co-ordinator thread). In fact, I need to find top 10 such queries with highest difference (from the queries that are getting exeuted in the span of 1 hour). How to track this information in SQL Server?

Note: I am using SQL Server 2012.

enter image description here


Get this bounty!!!

#StackBounty: #python #performance #algorithm #strings #search Naive implementation of KMP algorithm

Bounty: 50

After reading this answer to the question “High execution time to count overlapping substrings”, I decided to implement the suggested Knuth-Morris-Pratt (KMP) algorithm. I used the pseudo-code listed on Wikipedia for the functions kmp_table and kmp_search.

However, when running it on some corner-cases, I have observed that it is a lot slower than the standard str.find, which apparently uses a modified Boyer-Moore-Horspool algorithm and should thus have worse worst-case performance.

The specific case I looked at is:

$ ipython -i kmp.py
In [1]: text = "A"*1000000 + "B"
In [2]: word = "A"*100 + "B"
In [3]: %timeit kmp_search(text, word)
1 loop, best of 3: 410 ms per loop
In [4}: %timeit text.find(word)
1000 loops, best of 3: 703 µs per loop

So the difference is about a factor 1000 for this input. This is probably due to the fact that the native one is written in C and this is written in Python, but I still wanted to see if I did anything stupid here or missed any obvious optimization.

def kmp_table(word):
    table = [0] * len(word)
    position, candidate = 2, 0
    table[0] = -1

    while position < len(word):
        if word[position - 1] == word[candidate]:
            table[position] = candidate + 1
            candidate += 1
            position += 1
        elif candidate > 0:
            candidate = table[candidate]
        else:
            table[position] = 0
            position += 1
    return table


def kmp_search(text, word):
    m, i = 0, 0
    table = kmp_table(word)
    while m + i < len(text):
        if word[i] == text[m + i]:
            if i == len(word) - 1:
                return m
            i += 1
        else:
            if table[i] > -1:
                m += i - table[i]
                i = table[i]
            else:
                m += 1
                i = 0
    return len(text)


Get this bounty!!!

#StackBounty: #magento2 #javascript #performance #knockoutjs Magento 2: javascript elements load slowly

Bounty: 150

Checkout forms, gallery on product pages, and more elements loaded by javascript take up to 4 seconds to load.

What can be done to make javascript elements load faster?

Update:

I’m using a custom theme which inherits from Blank theme. I’ve not added additional JS files, only made minor changes to them for translation purposes.
All caches are enabled.
It is a live site in production mode.

Pages loading times:

Category and product pages: 5 to 6 seconds.
Cart and checkout: 9 to 10 seconds. (is it normal?)

On product pages, product images are the last to load. This can be annoying for the user. Is it possible to make them load faster / before other elements on the page?

product page
category page


Get this bounty!!!

#StackBounty: #windows-10 #cpu #performance #hyper-threading How does the Windows 10 scheduler deal with Hyper Threading since Core Par…

Bounty: 50

I’m running Windows 10 (1607) on an Intel Xeon E3-1231v3 CPU (Haswell, 4 physical cores, 8 logical cores).

When I first had Windows 7 installed on this machine, I could observe that four out of eight logical cores were parked until an application needed more than 4 threads. One can check with Windows resource monitor whether cores are parked or not (example).
As far as I understand, this is an important technique to keep the threads balanced across physical cores, as explained on the Microsoft website: “the Core Parking algorithm and infrastructure is also used to balance processor performance between logical processors on Windows 7 client systems with processors that include Intel Hyper-Threading Technology.

However after upgrading to Windows 10, I noticed that there is no core parking. All logical cores are active all the time and when you run an application using less than four threads you can see how the scheduler equally distributes them across all logical cpu cores. Microsoft employees have confirmed that Core Parking is disabled in Windows 10.

But I wonder why? What was the reason for this? Is there a replacement and if yes, how does it look like? Has Microsoft implemented a new scheduler strategy that made core parking obsolete?


Appendix:

Here is an example on how core parking introduced in Windows 7 can benefit performance (in comparison to Vista which didn’t have core parking feature yet). What you can see is that on Vista, HT (Hyper Threading) harms performance while on Windows 7 it doesn’t:

enter image description here

enter image description here

(source)

I tried to enable Core Parking as mentioned here, but what I observed was that the Core Parking algorithm isn’t Hyper Threading aware anymore. It parked cores 4,5,6,7, while it should have parked core 1,3,5,7 to avoid that threads are assigned to the same physical core. Windows enumerates cores in such a way that two successive indices belong to the same physical core. Very strange. It seems Microsoft has messed this up fundamentally. And no one noticed…

Furthermore, I did some CPU benchmarks using exactly 4 threads.

CPU affinity set to all cores (Windows defualt):

Average running time: 17.094498, standard deviation: 2.472625

CPU affinity set to every other core (so that it runs on different physical cores, best possible scheduling):

Average running time: 15.014045, standard deviation: 1.302473

CPU affinity set to the worst possible scheduling (four logical cores on two physical cores):

Average running time: 20.811493, standard deviation: 1.405621

So there is a performance difference. And you can see that the Windows defualt scheduling ranks between the best and worst possible scheduling, as we would expect it to happen with a non-hyperthreading aware scheduler. However, as pointed out in the comments, there may be other causes responsible for this, like fewer context switches, inference by monitoring applications, etc. So we still don’t have a definitive answer here.

Source code for my benchmark:

#include <stdlib.h>
#include <Windows.h>
#include <math.h>

double runBenchmark(int num_cores) {
  int size = 1000;
  double** source = new double*[size];
  for (int x = 0; x < size; x++) {
    source[x] = new double[size];
  }
  double** target = new double*[size * 2];
  for (int x = 0; x < size * 2; x++) {
    target[x] = new double[size * 2];
  }
  #pragma omp parallel for num_threads(num_cores)
  for (int x = 0; x < size; x++) {
    for (int y = 0; y < size; y++) {
      source[y][x] = rand();
    }
  }
  #pragma omp parallel for num_threads(num_cores)
  for (int x = 0; x < size-1; x++) {
    for (int y = 0; y < size-1; y++) {
      target[x * 2][y * 2] = 0.25 * (source[x][y] + source[x + 1][y] + source[x][y + 1] + source[x + 1][y + 1]);
    }
  }
  double result = target[rand() % size][rand() % size];
  for (int x = 0; x < size * 2; x++) delete[] target[x];
  for (int x = 0; x < size; x++) delete[] source[x];
  delete[] target;
  delete[] source;
  return result;
}

int main(int argc, char** argv)
{
  int num_cores = 4;
  system("pause");  // So we can set cpu affinity before the benchmark starts 
  const int iters = 1000;
  double avgElapsedTime = 0.0;
  double elapsedTimes[iters];
  for (int i = 0; i < iters; i++) {
    LARGE_INTEGER frequency;
    LARGE_INTEGER t1, t2;
    QueryPerformanceFrequency(&frequency);
    QueryPerformanceCounter(&t1);
    runBenchmark(num_cores);
    QueryPerformanceCounter(&t2);
    elapsedTimes[i] = (t2.QuadPart - t1.QuadPart) * 1000.0 / frequency.QuadPart;
    avgElapsedTime += elapsedTimes[i];
  }
  avgElapsedTime = avgElapsedTime / iters;
  double variance = 0;
  for (int i = 0; i < iters; i++) {
    variance += (elapsedTimes[i] - avgElapsedTime) * (elapsedTimes[i] - avgElapsedTime);
  }
  variance = sqrt(variance / iters);
  printf("Average running time: %f, standard deviation: %f", avgElapsedTime, variance);
  return 0;
}


Get this bounty!!!

#StackBounty: #magento-1.7 #database #api #rest #performance Creating an API to communicate directly with Magento DB

Bounty: 50

We have been using the Magento Rest API to communicate between external systems and Magento. Painfully, we have discovered that the REST API does not scale well. Specifically, often times requests time-out when requesting large amounts of data (say, about fifty products). We are using version 1.7.0.2.

One solution we are investigating is creating a custom API not written using Magento for the purpose of reading/writing into Magento.

  • Is this a solution typically used?
  • If so, any packages that simplify read/write operations from Magneto EAV tables?
  • If not, are there any ways to speed up the Magento REST API?

We are in the process of scaling and speed is very important to us.


Get this bounty!!!