Multiprocessing a simple Python foreach?

Starting learn python and i'm searching to increase the speed execution for the loop START/END which are commented. Do you think multiprocessing are the best solution or there another way for increasing the performance cause only 10% of CPU are used

def merge_csv(names):
        n = 0
        while n < len(names[0]):
            _, filename["%s" %n] = os.path.split(names[0][n])
            temp_data["%s" %n] = pandas.read_csv(open(names[0][n]), sep=',|;')
            data["%s" %n] = pandas.DataFrame(temp_data["%s" %n])
            data["%s" %n].insert(1, "oround", 1)
            data["%s" %n].insert(3, "pround", 1)
            for i in range(len(data["%s" %n].mz)):
                data["%s" %n].iloc[i,1]=round(data["%s" %n].iloc[i,0],2)
                data["%s" %n].iloc[i,3]=round(data["%s" %n].iloc[i,2],1)

            liste = list(zip(data["%s" %n]["oround"], data["%s" %n]["pround"]))
            n = n+1
        liste2 = list(set([u for u in listeall]))
        merged = pandas.DataFrame(liste2)
        colonnes = ["mz", "oround", "rt", "pround", "td", "CCS", "counts"]
        m = 0
        j = 0
        for j in range(len(names[0])):
                name_data = os.path.basename(names[0][j])
                for m in range(len(colonnes)):
                    colm = colonnes[m] + name_data
                    merged.insert(len(colonnes)*j+2+m, colm, 0)
        p = 0
        n = 0
        x = 0
        y = 0
        for p in range(len(liste2)):
            couple = liste2[p]
            resultmz = data["%s" %n]["oround"].isin([couple[0]])
            listemz = list(resultmz[resultmz == True].index)
            resultrt = data["%s" %n]["pround"].isin([couple[1]])
            listert = list(resultrt[resultrt == True].index)
            if len(listemz)>0 and len(listert)>0:
                for x in range(len(listemz)) :
                        for y in range(len(listert)) :
                            if listemz[x] == listert[y]:
                                for k in range(7):
                                    merged.iloc[p,7*n+2+k] = data["%s" %n].iloc[int(listemz[x]),k]
        csvmerged = results.to_csv('merged.csv')
        return merged

I know how to start single threads in Python but I don't know how to "collect" the results.

What's the easiest way to parallelize this loop?

Read more here:

Content Attribution

This content was originally published by user16497029 at Recent Questions - Stack Overflow, and is syndicated here via their RSS feed. You can read the original post over there.

%d bloggers like this: