# -*-coding:utf-8-*-

import os
import sys
import time
# import os.path
import subprocess
#import numpy
import math
import fileManip as fM
import remotePipeline as rP

verbose = False


def round_sig(x, sig=2):
    if x==0:return 0
    if math.isnan(x):
        if verbose:
            print "significant of NaN value"
        return float('nan')
    return round(x, sig-int(math.floor(math.log10(abs(x))))-1)


def getVar(data):
    if len(data) <= 1:
        return float('nan')
    K = data[0]
    n = 0
    sum_ = 0
    sum_sqr = 0
    for x in data:
        n = n + 1
        sum_ += x - K
        sum_sqr += (x - K) * (x - K)
    variance = (sum_sqr - (1.*sum_ * sum_) / n) / (n-1)
    # use n instead of (n-1) if want to compute the exact variance of the given data
    # use (n-1) if data are samples of a larger population
    #print data, ">", variance
    return variance


def isRelevant(l):
    """Lists the estimated interesting informations"""  # DEPRECATED?
    lRelev = ["Mean Gst ", "Mean Gstq ", "Mean Hs :", "Mean Hs qtl", "Mean Ht :", "Mean Ht qtl:", "Ht==0"]
    for lr in lRelev:
        if lr in l:
            return True
    return False


def getDetails(csvPath, mode="G"):
    """Extracts from a csv name the computation informations"""
    """ get informations through post-treatment files"""
    infos = csvPath[:-1].split("/")[-1]
    if len(infos) >= 5:
        #        ms = infos[0]
        #        sc = infos[1][6:]
        #        rp = infos[2][1:]
        #        isimu = infos[4].split("_")
        #        print "isi",isimu
        #        if mode == "G":
        #                sp = isimu[1]
        #                st = isimu[2].split(".")[0]
        #        else:
        #                sp = isimu[0]
        #                st = isimu[1]
        if len(infos.split("_")) == 3:
            infos = infos.split("_")[1]
        else:
            infos = infos.split("_")[1][:-5]

        infos = infos.split("-")
        if mode != "G" and "T" in infos[4]:
            infos.pop(4)  # SHOULD BE KEPT?

        return infos  # (ms, sc, rp, sp, st)
    else:
        print "INFOS not sufficient"


def addKV(d, sc, t, rep, k, v):
    """Add value to the corresponding key in the given dictionnary"""
    if "nan" in v or "NaN" in v:
        if verbose:
            print "Nan val:", sc, t, rep, k, v
    else:
        if sc not in d:
            d[sc] = {}
        if t not in d[sc]:
            d[sc][t] = {}
        if k not in d[sc][t]:
            d[sc][t][k] = {}

        d[sc][t][k][rep] = v
        # print d#TMP


def prez(ds, modeVisu="MS", infos="min", initInfos=[1, 5, 9, 10], pathInfos="", displayVar=True,
         widthMax=10000):  # Full Expanded : FE, Mid Synth : MS
    """Display the results with several level of exhaustivity under the form of a cascade"""
    # mini = just Qst, Gst, Gstq
    refreshVarz = True
    txtPrez = ""
    sc = [int(s) for s in ds.keys()]
    sc.sort()

    varz = []

    for si in sc:
        s = str(si)
        txtPrez += "SC " + s + "\n"

        if si in initInfos:
            isInfos = False
            pathGI = pathInfos + "/Scenar" + s
        # print "ds",ds
        ti = [int(i) for i in ds[s].keys()]
        ti.sort()
        for t in ti:
            st = str(t)
            txtPrez += "\tT " + st + "\n"

            if varz == [] or refreshVarz:
                varz = ds[s][st].keys()
                varz.sort()
            for k in varz:  # ds[s][st]:
                if k in ds[s][st]:
                    # PRINT E / ZOPT / SEL_INT
                    if si in initInfos and not isInfos:
                        for fgi in ["e.txt", "sel_int.txt", "zopt.txt"]:
                            pathFGI = pathGI + "/R" + ds[s][st][k].keys()[0] + "/settings/" + fgi
                            if verbose:
                                print "PFGI:", pathFGI
                            #try:
                            fo = rP.readFile(pathFGI)
                            txtPrez += "\t" * 2 + fgi + "\n\t\t\t" + "\n\t\t\t".join(fo) + "\n"
                            #except:
                                #if verbose:
                                #    print "Open of file failed"
                            #    e = sys.exc_info()[0]
                            #    print "Error", e
                        isInfos = True

                    isPrinted = False
                    if infos != "mini" or ("Qst" in k or "Gst" in k):
                        isPrinted = True
                    if modeVisu == "FE" and isPrinted:
                        txtPrez += "\t\t" + k + "\n"
                        for v in ds[s][st][k]:
                            # print "dd", ds[s][t][k]
                            txtPrez += "\t\t\t" + str(v) + " : " + str(ds[s][st][k][v])
                    elif modeVisu == "MS" and isPrinted:
                        kv = ds[s][st][k]
                        if len(kv) > 0:
                            # print "kvv",k, kv, type(kv.values()[0])
                            try:
                                kvv = [round_sig(float(i), 5) for i in kv.values()]
                            except:
                                kvv = [0] * len(kv.values())
                                if verbose:
                                    print "kvv", kvv, "not string, DATA CORRUPTED (need change in py code)"
                            if displayVar:
                                kvD = " [" + str(round_sig(getVar(kvv), 4)) + "]"
                            else:
                                kvD = ""
                            mainInfo = "\t\t" + k + " " + str(round_sig(sum(kvv) * 1. / len(kv), 3)) + kvD
                            if len(str(kv)) > (widthMax - len(mainInfo)):
                                detailInfo = str(kv)[:(widthMax - len(mainInfo) - 3)] + "..."
                            else:
                                detailInfo = str(kv)
                            txtPrez += mainInfo + " " + detailInfo + "\n"
                        else:
                            print "KV empty for", s, st, k, "(", ds[s][st], ")"
                else:
                    # print k, 'from', varz, "not in", ds[s][st]
                    varz = []
    if verbose:
        print txtPrez
    return txtPrez


dTestContent = {"1": "om2=10^9, k_zopt=0.2,  Nm=10.2, noAM",
                "101": "om2=10^9, k_zopt=0.2,  Nm=10.2, noAM, E not in F",
                "2": "om2=10^9, k_zopt=0.2,   Nm=1, noAM",
                "3": "om2=10^9, k_zopt=0.2,   Nm=10.2, noAM, N=200",
                "4": "om2=50, k_zopt=0.2,   Nm=10, noAM",
                "5": "om2=50, k_zopt=0.2,  Nm=10,  noAM, E not in F",
                "6": "om2=50, k_zopt=1.5,    Nm=10,noAM",
                "7": "om2=50, k_zopt=1.5,  Nm=1, noAM",
                "8": "om2=5, k_zopt=0.2, Nm=10,  noAM",
                "9": "om2=5, k_zopt=0.2,   Nm=1, noAM",
                "10": "om2=5, k_zopt=1.5,  Nm=10, noAM",
                "11": "om2=5, k_zopt=1.5,   Nm=1, noAM",
                "12": "om2,=50, k_zopt=0.2,  Nm=10, noAM, E not in F",
                "13": "om2=50, k_zopt=1.5,  Nm=10, noAM, 60 loci",
                "14": "om2=5, k_zopt=1.5,  Nm=10, noAM, 60 loci",
                "15": "om2=50, k_zopt=1.5,  Nm=10, AM0.8, co-gradient",
                "16": "om2=5, k_zopt=1.5,  Nm=10, AM0.8, co-gradient",
                "17": "om2=50, k_zopt=1.5,  Nm=10, AM0.8, contre-gradient",
                "18": "om2=50, k_zopt=1.5, ke =2, Nm=10, AM0.8, contre-gradient, E not in F",
                "19": "om2=50, k_zopt=1.5,  Nm=10, AM0.8, co-gradient, no E in fitness",
                "1104": "nloci=41, om2: [5, 5], loci [1, 3, ... 17, 19], [21, 23, ... 37, 39], chr:0,20",
                "1105": "nloci=41, om2: [5, 5], loci [1, 3, ... 17, 19], [2, 4, ... 18, 20], chr:0,20",
                "1106": "nloci=41, om2: [50, 50], loci [1, 3, ... 17, 19], [21, 23, ... 37, 39] chr:0,20",
                "1107": "nloci=41, om2: [50, 50], loci [1, 3, ... 17, 19], [2, 4, ... 18, 20] chr:0,20",
                "1108": "nloci=41, om2: [10**9, 10**9], loci [1, 3, ... 17, 19], [21, 23, ... 37, 39] chr:0,20",
                "1109": "nloci=41, om2: [10**9, 10**9], loci [1, 3, ... 17, 19], [2, 4, ... 18, 20] chr:0,20"
                }

#gstqT1 = "T1 -> Mean Gstq : "
equivTitle = {"Mean Gst : ": "Gst", "Mean Gstq : ": "Gstq", "Mean Qst": "Qst", "Addval Mean North": "Ad M N",
              "Addval Mean South": "Ad M S", "Addval Var North": "Ad V N", "Addval Var South": "Ad V S",
	      "Mean(Addval Var)": "Ad V", "Addval Mean Glob": "Ad M", "Mean Gstq : ":"Gstq",
              "Mean Gstq a : ":"Gstq_a", "Mean Gstq b : ":"Gstq_b", "Mean Gstq c : ":"Gstq_c", "HT==0":"Null Ht",
              "Ginter[ 1 , 2 ]":"Gb corr", "Gintra[ 1 , 2 ]":"Gw corr", "Pinter[ 1 , 2 ]":"Pb corr", "Pintra[ 1 , 2 ]":"Pw corr"}


def txtFromRepList(vrep, displayVar=True, interm="\t", replaceNA=False, nrep=-1):
    """Get mean and sd for the given values list"""
    #   print "vrep?", vrep

    if len(vrep) > 0:
        vrepNoNA = []
        nbNA = 0
        for i in vrep.values():
            #print "i", i, type(i)
            #print "Li", len(i)
            if "NA" not in i and not math.isnan(float(i)):
                vrepNoNA += [round_sig(float(i), 7)]
            else:
                #print i, type(i), len(i)
                if not replaceNA and displayVar: #logic issue here TODO clarify this
                    vrepNoNA += [0]
                else:
                    nbNA += 1
                #kvv = [round(float(i), 7) for i in vrep.values()]
        #if replaceNA:
        #    print "POST NA", vrepNoNA
        #    print nrep
        if replaceNA and len(vrepNoNA) < nrep:
            vrepNoNA += [0]*(nrep-len(vrepNoNA))

    if len(vrepNoNA) > 0:
        kvvs = str(round_sig(sum(vrepNoNA) * 1. / len(vrepNoNA), 3))
        kvvD = str(round_sig(getVar(vrepNoNA), 4))
        if displayVar:
            varPart = " [" + kvvD + "]"
        else:
            if not replaceNA and displayVar:  # bug: should not be in the else...
                varPart = " ("+ str(nbNA)+ " NA ignored)"
            else:
                varPart = ""
    else:
        varPart = ""
        kvvs = "X"
        if replaceNA:
            kvvs = "0"
            if displayVar:
                varPart = " [0.]"
    return kvvs + varPart + interm


def outCalc(ds, addname, prefix="", displayVar=False):
    """Creates a table with the average of some of the interest values. Easy to paste into a calc sheet"""
    txtout = prefix + "\n" * 2
    tInterm = 960
    tEnd = 19200 #1000
    orderColumns0 = [("Mean Qst", 1), ("Mean Qst", tInterm), ("Mean Qst", tEnd),
                     ("Mean Gstq : ", 1), ("Mean Gstq : ", tInterm), ("Mean Gstq : ", tEnd),
                    ("Addval Mean Glob", 1), ("Addval Mean Glob", tInterm),  ("Addval Mean Glob", tEnd),
                    ("Mean(Addval Var)", 1), ("Mean(Addval Var)", tInterm), ("Mean(Addval Var)", tEnd)]

    orderColumnsDirect = [("Mean Gst : ", 1), ("Mean Gst : ", tInterm),  ("Mean Gst : ", tEnd),
                          ("Ginter[ 1 , 2 ]", 1), ("Ginter[ 1 , 2 ]", tInterm), ("Ginter[ 1 , 2 ]", tEnd),
                          ("Gintra[ 1 , 2 ]", 1), ("Gintra[ 1 , 2 ]", tInterm), ("Gintra[ 1 , 2 ]", tEnd),
                          ("Pinter[ 1 , 2 ]", 1), ("Pinter[ 1 , 2 ]", tInterm), ("Pinter[ 1 , 2 ]", tEnd),
                          ("Pintra[ 1 , 2 ]", 1), ("Pintra[ 1 , 2 ]", tInterm), ("Pintra[ 1 , 2 ]", tEnd)]
                    #(gstqT1, 1), (gstqT1, tInterm), (gstqT1, tEnd),
                    #("HT==0", 1), ("HT==0", tInterm),  ("HT==0", tEnd),
    focusTraits = ["T1", "T2"]
    orderColumns = []
    for t in focusTraits:
        orderColumns += [(t+" -> "+i[0], i[1]) for i in orderColumns0]
    orderColumns += orderColumnsDirect
    txtout += "\nStep\t"
    for k in orderColumns:  # header display
        txtout += str(k[1]) + "\t"
    txtout += "\n#Id\t"
    for k in orderColumns:  # header display
        kshort = k[0]
        traitInfo = ""
        if "->" in k[0]:
            if len(focusTraits) > 1:
                traitInfo = k[0].split(" ->")[0] + " "
            kshort = k[0][6:]
        txtout += traitInfo + equivTitle[kshort] + "\t"

    txtout += "Test\n"
    interm = "\t"
    sc = [int(s) for s in ds.keys()]
    sc.sort()
    lign = 0
    for si in sc:
        if lign % 5 == 0:
            txtout += "- -- " * 20 + "\n"
        s = str(si)
        txtout += "#" + s + "\t"  # + dTestContent[s] + "\t"
        for k in orderColumns:
            if str(k[1]) in ds[s] and k[0] in ds[s][str(k[1])]:
                # print "dsk", ds[s][str(k[1])]
                kv = ds[s][str(k[1])][k[0]]
                replNA = False
                if "Pop" in k:
                    replNA = True
                fromRep = txtFromRepList(kv, displayVar, interm, replaceNA=replNA, nrep=len(kv))

                txtout += fromRep
            else:
                # print "k", k, "not in", ds[s]
                txtout += interm
        if s in dTestContent:
            txtout += dTestContent[s]
        else:
            if verbose:
                print s, "not in dTestContent: ", dTestContent.keys()
        txtout += "\n"
        lign += 1
    # print "TO\n",txtout
    f = open(addname, "w")
    f.write(txtout)
    f.close()


def outSplit(ds, addname, displayVar=True, species=["S1"]):
    """Creates separated files with the average of some of the interest values."""
    interm = "\t"  # row separator in files
    valByFile = {"Gcorr": ["Ginter*", "Gintra*"],  # split values into files, * can be used at the end of generic expr
                 "Pcorr": ["Pinter*", "Pintra*"],
                 # "Add":[], #NOT BY TIME STEP BUT BY POP AND TIME STEP
                         "neutral": ["Mean Gst : ", "Mean Hs : ", "Mean Ht : "],
                 # "Genepop_T*": [gstqT1, "Mean Qst", "theta_b", "theta_w", "Mean Hs qtl : ", "Mean Ht qtl: ", "Qst_a", "Qst_b"],
                 "qtl": ["Mean Gstq : ", "Mean Gstq a : ", "Mean Gstq b : ", "Mean Qst", "theta_b", "theta_w", "Mean Hs qtl : ", "Mean Ht qtl: ",
                         "Qst_a", "Qst_b",
                         "Vw_a", "Vw_b", "Vw_c", "Vb_a", "Vb_b", "Vb_c"],
                 "Demography": ["Nind*"]}

    transcriptedKeys = {"Mean Gst : ": "Gst", "Mean Qst": "Qst","Mean Gstq : ": "Gst_q",
                        "Mean Hs : ": "Hs", "Mean Ht : ": "Ht", "Mean Ht qtl: ": "Ht_q", "Mean Hs qtl : ": "Hs_q",
                        "Ginter*": "CCG_b", "Pinter*": "CCP_b", "Gintra*": "CCG_w", "Pintra*": "CCP_w",
                        "Nind*": "Pop "}  # Change the name used in the header of the splitted files

    if verbose:
        print 'ds', ds.keys()

    sc = [int(s) for s in ds.keys()]
    if len(sc) > 1:
        print "Number of scenarii different from 1, will treat only the first one"
    elif len(sc) < 1:
        print "Dictionary empty. Can't compute statistics."
        exit(0)
    si = sc[0]
    s = str(si)
    # GET VALUES CORRESPONDING TO THE SCENARIO
    monoDico = ds[s]

    # GET TIMES CORRESPONDING TO THE SCENARIO
    ti = [int(i) for i in monoDico.keys()]
    ti.sort()

    nrep = max([len(i) for i in monoDico[monoDico.keys()[0]].values()])

    ## GET TRAITS LIST VIA KEY INFORMATION
    lTraits = []
    allKeys = monoDico[str(ti[0])].keys()

    for k in allKeys:
        if " -> " in k:
            t = k.split(" -> ")[0].split(" ")[-1]  # IF FORMAT Sn Tn instead of Tn
            if t not in lTraits:
                lTraits += [t]
    lTraits.sort()

    # list all files from their generic expression
    valByFileExpanded = {}
    for v in valByFile:
        if "T*" in v:
            for t in lTraits:
                valByFileExpanded["_".join(v.split("_")[:-1]) + "_" + t] = valByFile[v]
        else:
            valByFileExpanded[v] = valByFile[v]
    if verbose:
        print "vbfe", valByFileExpanded

    # TODO duplicated code?
    allKeys = []
    for t in ti:
        for k in monoDico[str(t)].keys():
            if k not in allKeys:
                allKeys += [k]

    #print "allKeys", allKeys

    # Check number of rep
    minNREP, maxNREP = -1, -1
    dNREP = {}

    # GENERATION OF F1:Gcorr, F2:Pcorr, F4:GENEPOP, F4T:GENEPOPT

    for filev in valByFileExpanded:
        #print "filev", filev
        selectedTrait = ""
        if "_trait" in filev:
            selectedTrait = filev.split("_")[-1]
            #print "Selected trait",selectedTrait

        txtout = ""
        # get real keys from pattern in valByFile[filev]
        wantedKeys = []
        for targetKey in valByFileExpanded[filev]:
            if "*" in targetKey:
                for k in allKeys:
                    if targetKey[:-1] in k and k not in wantedKeys:  # ONLY WILDCARD: AT THE END
                        wantedKeys += [k]
            else:
                for k in allKeys:
                    if targetKey in k and k not in wantedKeys:
                        wantedKeys += [k]
        if verbose:
            print "wk", filev, wantedKeys
        wantedKeys20 = []
        checkLt = {}
        for k in wantedKeys:
            if (" -> " in k and (selectedTrait == k.split(" -> ")[0].split(" ")[-1] or "qtl" in filev)) or " -> " not in k:
                wantedKeys20 += [k]
                if k[0] == "T" and "Gstq " in k and "Gstq :"not in k:  # detect ltype Gst
                    if k[:2] not in checkLt:
                        checkLt[k[:2]] = []
                    checkLt[k[:2]] += [k]
        if verbose:
            print "check lt", checkLt
        wantedKeys2 = []
        for k in wantedKeys20:
            if "Gstq " in k and "Gstq :"not in k:
                if len(checkLt[k[:2]]) > 1:
                    wantedKeys2 += [k]
                elif verbose:
                    print "ignore", k
            else:
                wantedKeys2 += [k]

        # TODO rehabilit trait split into several files (first test of the previous condition),
        # need add a T* somewhere in file def at the beginning of the function

        if wantedKeys2 == []:
            if verbose:
                print "No row for filev", filev
        else:
            wantedKeys2.sort()
            # txtout += "#" + s + "\t"  # + dTestContent[s] + "\t"
            txtout += "step" + interm
            for k in wantedKeys2:
                if "_T" in filev:
                    keySplit = k.split(" -> ")
                    keyName = keySplit[-1]
                    keyTrait = str(keySplit[:-1])[2:-2]
                    if keyTrait == selectedTrait:
                        if keyName in transcriptedKeys:
                            keyName = transcriptedKeys[keyName]

                        # txtout += keyTrait + " " + keyName + interm
                        txtout += keyName + interm
                    else:
                        print "trait different", keyTrait, selectedTrait
                elif "_AllT" in filev:
                    keySplit = k.split(" -> ")
                    keyName = keySplit[-1]
                    keyTrait = str(keySplit[:-1])[2:-2]
                    if keyName in transcriptedKeys:
                        keyName = transcriptedKeys[keyName]

                    # txtout += keyTrait + " " + keyName + interm
                    # print "k ALLT", k, keyName
                    txtout += "[" + keyTrait + "] " + keyName + interm

                else:
                    if k in transcriptedKeys:
                        k = transcriptedKeys[k]
                    else:
                        flagModif = False
                        for tk in transcriptedKeys:
                            if "*" == tk[-1]:
                                tkBase = tk[:-1]
                                if k[:len(tkBase)] == tkBase:
                                    k = transcriptedKeys[tk] + k[len(tkBase):]
                                    flagModif = True
                                    break

                    txtout += k + interm
            txtout += "\n"
            for t in ti:
                txtt = str(t)
                txtout += txtt + interm
                for k in wantedKeys2:
                    if txtt in monoDico and k in monoDico[txtt]:
                        kv = monoDico[txtt][k]
                        dNREP[(txtt, k)] = len(kv)
                        if minNREP == -1 or len(kv) < minNREP:
                            minNREP = len(kv)
                        if maxNREP == -1 or len(kv) > maxNREP:
                            maxNREP = len(kv)
                        replNA = False
                        #print "k", k
                        if "Nind" in k:
                            replNA = True
                        txtout += txtFromRepList(kv, displayVar, interm, replaceNA=replNA, nrep=nrep)
                    else:
                        # print "k", k, "NOT in", str(monoDico)[:100]
                        txtout += "-" + interm
                txtout += "\n"
            f = open(addname + "_" + filev, "w")
            f.write(txtout)
            f.close()
    # GENERATION OF F3T*:AddT* Different format, done separately
    keywordsADD = ["ADDMEAN", "ADDVAR"]
    displayQuantByPop = False
    txtTADD = ""
    dFullADDbyPop = {}

    for t in ti:
        wantedAddKeys = []
        txtt = str(t)
        txtTADD += "#Step" + txtt + "\n"
        if wantedAddKeys == []:
            for ref_k in keywordsADD:
                for effective_k in monoDico[txtt]:
                    if ref_k in effective_k:
                        wantedAddKeys += [effective_k]
            wantedAddKeys.sort()
            # print "EFFADD", t, effective_k
        dADDbyPop = {}
        for eff_k in wantedAddKeys:
            # SPLIT DATA BY POP
            effS = eff_k.split("_")  # FORMAT EFF_F: 2-ADDVAR-T1_12
            if len(effS) == 2:
                k, pop = effS
                if int(pop) not in dADDbyPop:
                    dADDbyPop[int(pop)] = {}
                dADDbyPop[int(pop)][k] = monoDico[txtt][eff_k]
                if k not in dFullADDbyPop:
                    dFullADDbyPop[k] = {}
                if int(pop) not in dFullADDbyPop[k]:
                    dFullADDbyPop[k][int(pop)] = []
                dFullADDbyPop[k][int(pop)] += [monoDico[txtt][eff_k]]
            else:
                print "can't parse EFFADD"

        dADDbyPopTxt = {}
        if dADDbyPop != {}:
            lengthMaxCellText = len(str(max([int(i) for i in dADDbyPop.keys()])))
            for p in dADDbyPop:
                lengthCellText = len(str(p).split()[0])
                pt = "0" * (lengthMaxCellText - lengthCellText) + str(p).split()[0]
                # print "pt", pt, len(pt), "from", p
                dADDbyPopTxt[pt] = dADDbyPop[p]

        if dADDbyPopTxt != {}:
            addkey_list = dADDbyPopTxt[dADDbyPopTxt.keys()[0]].keys()
            addkey_list.sort()  # DOES NOT WORK, NEED NUMBER TO SORT, which will not be displayed
            # print "ak", addkey_list
            txtTADD += "pop" + interm
            for k in addkey_list:
                txtTADD += k.split("-")[1] + interm
            txtTADD += "\n"

            pops = dADDbyPopTxt.keys()
            pops.sort()

            for p in pops:
                txtTADD += str(p) + interm
                # print "pk", p, k, dADDbyPopTxt[p][k]
                for k in addkey_list:
                    vrep = dADDbyPopTxt[p][k]
                    dNREP[(txtt, k)] = len(vrep)
                    if minNREP == -1 or len(vrep) < minNREP:
                        minNREP = len(vrep)
                    if maxNREP == -1 or len(vrep) > maxNREP:
                        maxNREP = len(vrep)
                    txtTADD += txtFromRepList(vrep, displayVar, interm)
                txtTADD += "\n"
            txtTADD += "\n"
        else:
            print "addpop empty for step", t

    if displayQuantByPop:
        f = open(addname + "_quantitative_byPop", "w")
        f.write(txtTADD)
        f.close()

    #print dFullADDbyPop

    keywordsADDsolo = ["ADDMEAN", "ADDVAR"]
    for kas in keywordsADDsolo:
        wantedAddKeys = []
        if wantedAddKeys == []:
            for effective_k in dFullADDbyPop:
                if kas in effective_k:
                    wantedAddKeys += [effective_k]
                    #print "key added"
        if dFullADDbyPop != {}:
            for ak in wantedAddKeys:
                #print "for ak..."
                txtSoloTable = "\t"+"\t".join([str(t) for t in ti]) + "\n"
                sortedP = dFullADDbyPop[ak].keys()
                sortedP.sort()
                for p in sortedP:
                    valP = dFullADDbyPop[ak][p]
                    ltxtP = [txtFromRepList(i, False, interm="") for i in valP]
                    txtSoloTable += str(p) + "\t" + "\t".join([str(i) for i in ltxtP])+ "\n"

                f = open(addname + "_Table_" + "-".join(ak.split("-")[1].split()) +"_byPop", "w")
                f.write(txtSoloTable)
                f.close()

    if minNREP == maxNREP:
        print "[INFO] For every synthetic value, "+str(minNREP)+" replications have been found."
    else:
        print fM.hilite("[WARNING] The number of replications differed between some synthetic values ("+str(minNREP)+" to "+str(maxNREP)+" replications.)", False, False)
        print "Detail about the number of replications by value for non max rep value:"
        nrep = dNREP.values()
        nrep = list(set(nrep))
	for i in nrep[:-1]:
            print i, "replications"
            for j in dNREP:
                if dNREP[j] == i:
                    print "\t", j


def cleanDico(d):
    """Find and eliminates unwanted data: Nan results and duplicates"""
    d2 = {}
    dOriRep = {}
    dOrig, lRep = {}, []

    isFarTime = False

    for sc in d:
        d2[sc] = {}
        dOriRep[sc] = [0, 0]
        dOrig[sc] = []
        # CHECK WHICH REP ARE DUPLICATES
        for t in d[sc]:
            if not t.isdigit():
                print "ERROR:", t, "not a number"
            k0 = d[sc][t].keys()[0]
            if verbose:
                print "k", k0
            lk = []
            for rep in d[sc][t][k0].keys():
                if int(t) > 10 and (sc, rep) not in lRep:
                    try:
                        isFarTime = True
                        lknew = [d[sc][t][i][rep] for i in d[sc][t].keys()]
                    except:
                        if "Nind" not in i: #if patch empty, Nind not present but not a bug
                            if verbose:
                                print sc, t, i, rep, "bug"
                                print "full ref: d = ", d
                        elif verbose:
                            print "Nind", i, "not found (pop empty?)"
                        lknew = []

                    if verbose:
                        print "lkn", lknew
                    if lknew in lk:
                        if verbose:
                            print "rep ", rep, "is a duplicate"
                        lRep += [(sc, rep)]
                        dOriRep[sc][1] += 1
                    elif rep not in dOrig[sc] and lknew != []:
                        lk += [lknew]
                        dOrig[sc] += [rep]
                        dOriRep[sc][0] += 1

        # KEEP ONLY ORIGINAL REP
        for t in d[sc]:
            d2[sc][t] = {}
            for rep in dOrig[sc]:
                for k in d[sc][t]:
                    if k not in d2[sc][t]:
                        d2[sc][t][k] = {}
                    # if rep not in d2[sc][t]:
                    #	d2[sc][t][k][rep] = {}
                    if rep in d[sc][t][k]:
                        d2[sc][t][k][rep] = d[sc][t][k][rep]
    if isFarTime:
        if len(lRep) > 0:
            print "[WARNING] After clean : ", len(lRep), "rep not considered : ", lRep
        if verbose:
            for sc in dOriRep:
                print sc, dOriRep[sc]
        return d2
    else:
        return d


def getStats(folder, typ="reg", dS={}, dE={}, prefixSh="", species=["S1"], isRemote=False, username=""):  # reg or PHENO or GENES
    """Go through all csv present in the indicated location and extract informations (all / only PHENO / only GENES)"""
    cStamp = int(time.time()) % 1000000
    # ld = os.listdir("./"+regFolder)
    # lrout = []

    oRegList = "MERGE_" + str(cStamp) + ".txt"
    if typ == "standalone":
        oRegList = "outputs/" + oRegList
    cwd = os.getcwd()
    if "NonReg" in cwd or "VisuMTP" in cwd:
        prefixSh += "../"

    if verbose:
        print "CD:", os.getcwd()
    script = "getRout.sh"
    if isRemote:
        script = "getRout_remote.sh"
    subprocess.call(["sh", prefixSh + script, folder, oRegList, username])


    fcsv = open(oRegList, "r")
    lcsv = fcsv.readlines()  # change "csv" name by rout for consistency
    fcsv.close()
    dStats = dS
    dExcuses = dE

    # print lcsv
    for csv in lcsv:
        csv2 = csv.rstrip()
        #rez = open(csv2)
        #lrez = rez.readlines()
        #rez.close()
        lrez = rP.readFile(csv2, prefix=folder)

        if "-T" in csv2 or "PHENO" in folder:  # PHENO ANALYSIS
            dtrout = getDetails(csv, mode="P")
            sc, rep, sp, t = dtrout[1], dtrout[2], dtrout[3], dtrout[4]
            for l in lrez:
                if "[1] " in l and "[F]" in l:
                    ls = l.split(" : ")
                    if len(ls) == 3:
                        k, v = ls[1:]
                        if len(species) > 1:
                            k = sp + " " + k
                        if "e" in v:
                            va = v[:min(6, v.find('"'))] + v[v.find("e"):v.find('"')]
                        else:
                            va = v[:min(6, v.find('"'))]
                        addKV(dStats, sc, t, rep, k, va)

        else:  # GENES ANALYSIS
            dtrout = getDetails(csv, mode="G")
            # print"G>", dtrout#TMP
            sc, rep, sp, t = dtrout[1], dtrout[2], dtrout[3], dtrout[4]
            for i in range(len(lrez)):
                l = lrez[i]
                k = ""
                if "[1]" in l:
                    # print "Rg:", l
                    if '"' in l and isRelevant(l):
                        k = l.split('"')[1]
                        if len(species) > 1:
                            k = sp + " " + k
                        v = lrez[i + 1].split()[-1]
                        # print sc, t, rep, k, v[:6]#TMP
                        addKV(dStats, sc, t, rep, k, v[:6])
                # elif "argSelected" in l and "print(argSelected)" not in l and "Mean Qst a" not in l and "Gst[argSelected]" not in l and "Ht" not in l and "Hs" not in l:
                #    # print "AS", l
                #    k = l[8:-2]
                #    v = lrez[i + 1].split()[-1]
                #    # print "kva", k, v
                #    addKV(dStats, sc, t, rep, k, v) # NEED RECAST WITH ENHANCED QTL MANAGEMENT
                elif "cannot allocate" in l:
                    if sc not in dExcuses:
                        dExcuses[sc] = {}
                    if t not in dExcuses[sc]:
                        dExcuses[sc][t] = {}
                    dExcuses[sc][t][rep] = "Allocation"
                i += 1
    # print "dstv2", dStats

    # DIFFERENT SPIRIT: GET POPULATION SIZE
    #if typ == "standalone":
    #    # print "whereami", folder, "(", os.getcwd(), ")"
    #    treatDir = folder + "/../data_processing/"
    #    oCsvList = folder + "/pRocessed/ADDCSV_" + str(cStamp) + ".txt"
    wantPop = True
    if wantPop:
        oCsvList = "ADDCSV_" + str(cStamp) + ".txt"
        if typ == "standalone":
            treatDir = folder + "/../data_processing/"
            oCsvList = folder + "/pRocessed/" + oCsvList
        else:
            treatDir = folder+"/../../treatment/"
        cwd = os.getcwd()
        #if "NonReg" in cwd or "VisuMTP" in cwd:
        #    prefixSh += "../"

        if verbose:
            print "CD:", os.getcwd()
        script = "getAgeCsv.sh"
        if isRemote:
            script = "getAgeCsv_remote.sh"

        cmdPop = ["sh", treatDir + "Sh_scripts/"+script, folder, oCsvList, username]
        print "cmdPop", cmdPop
        subprocess.call(cmdPop)
        fcsv = open(oCsvList, "r")
        lcsv = fcsv.readlines()
        print "lcsv", lcsv
        for csv in lcsv:
            sp, ti = csv.split("/")[-1].split("_")[0], csv.split("/")[-1].split("_")[1]
            rep = "0"
            sc = "0"
            if "/R" in csv:
                repBeg = csv.find("/R") + 2
                repEnd = csv.find("/", repBeg)
                rep = csv[repBeg:repEnd]

            # get pop by cell in the current filev
            popByCell = fM.parseAgeCsv(csv, splitAge=False)
            print "pbc", popByCell
            lengthMaxCellText = len(str(max([int(i) for i in popByCell.keys()])))
            for cell in popByCell:
                lengthCellText = len(str(cell))
                txtcell = "0" * (lengthMaxCellText - lengthCellText) + str(cell)
                currKey = "Nind" + str(txtcell)
                if len(species) > 1:
                    currKey = sp+" "+currKey
                addKV(dStats, sc, ti, rep, currKey, str(int(popByCell[cell])))

        isRmInterm = True
        if (isRmInterm):
            os.remove(oCsvList)
    # remove identical results (when all interest values are identical)
    cleanDuplicates = True
    if cleanDuplicates:
        dStats = cleanDico(dStats)

    # print "DE", dExcuses
    #print 'ds', dStats#['1']['48']
    return dStats, dExcuses


if __name__ == "__main__":
    # ftarget = "Xi5025"
    # ds, de = getStats(ftarget)

    ftarget = "../METAPOP/outputs"
    ds, de = getStats(ftarget)
    # ds, de = getStats("PHENO_579585", "PH", ds, de)
    # ds, de = getStats("GENES_449483", "GEN", ds, de)

    cStamp = int(time.time()) % 1000000
    # outCalc(ds, "MERGEOUT_" + str(cStamp) + ".txt", prefix=prez(ds, pathInfos=ftarget), displayVar=True)
    outSplit(ds, "TEST_" + str(cStamp), displayVar=True)
