#! /usr/bin/Rscript
source("fetching_functions.r")

args =  commandArgs(TRUE)
argSpecies = args[1]
print(argSpecies)
argStep  = args[2]
print(argStep)
argInput = args[3]
print(argInput)


argSelected = getArgselected(args)
argLtSelected = getArgLtselected(args)

print(argSelected)
print(argLtSelected)

#Set correct directory
setwd(argInput)

##################################################    
# Construct a general list of files of interset  #
#  = .csv, 3 fields separated by "_"             #
##################################################
#Get all files    
filesList = list.files()
print(filesList)

fileAndsub = findCsv(filesList, TRUE)
s = fileAndsub[[1]]
filesList = fileAndsub[[2]]
sublists = fileAndsub[[3]]

##############  Ploidy  #################
#    Fixed file, one line per specie    #
#########################################
handle = file("ploidy.csv",open="r")
flines = readLines(handle)
flines = strsplit(flines, split=" ")
ploidy = lapply(flines, `[`, -1)
ploidy = lapply(ploidy, as.numeric)
names(ploidy) = lapply(flines, `[`, 1)
#########################################

#Checking consistency of arguments
checkConsistencyGeno(filesList, argSpecies, argStep)

###############################################################################
###############################################################################
#########                         Analysis                            #########
###############################################################################
###############################################################################

#Calculates a H for a given locus
#(H = 1 - Sigma{i=1 to i=nAll}[ P_i^2 ]) Nils Ryman
calc_h = function(input_mat, all_list)
{
    gc()
    #print("CALCH1")
    v = as.vector(input_mat)
    #print("CALCH2")
    eff = as.matrix(sapply(v, '==', all_list)) #as.matrix in case of only one line
    #print("CALCH3")
    ###eff = as.vector(eff)
    #print(eff)
    #with(eff, sum)
    eff = apply(eff, 1, sum)
    #print("CALCH4")
    P = eff/length(v)
    P = P^2
    return(1-sum(P))
}


genomes = read.table(filesList[[argSpecies]][[argStep]], sep = " ", header = F)
locLims = c(0,cumsum(ploidy[[argSpecies]]))

#Get pop numbers index
popIndex = genomes[,1]
#Get pops ids
pops = unique(popIndex)
#Make a mask to select inds for each pops
splitter = (lapply(pops, '==', popIndex))

#Isolate genome columns
genomes = as.matrix(genomes[,c(-1,-2)])

#Gives the col numbres of each locus
locus = list()
for(i in 1:(length(locLims)-1)) locus[[i]] = (locLims[i]+1):locLims[i+1]

#Get all possible alleles for each locus
loc_all_list = lapply(locus, function(x) unique(c(genomes[,x])) )
dimR = min(20, dim(genomes)[1])
dimC = min(20, dim(genomes)[2])
print(genomes[1:dimR,1:dimC])
print(loc_all_list)
print(length(loc_all_list))

print(locus)

#####################
### RECAST CALC H ###
#####################
h_from_eff = function(eff)
{
	npop = sum(eff[1,])

	h = eff/npop
	h = h^2
	h = 1-apply(h, 1, sum)
	return(h)
}

calc_h_oneshot_BIS = function(input_m)
{		
	ptm <- proc.time()

	all_H <- 0
	Ht <- 0

	listLocPop = list()
	npop <- length(input_m[,1])
	nloc <- length(input_m[1,]) # should be recasted by method to get matrix size

	nallmax <- 0	
	for (l in 1:length(loc_all_list))
	{
		nall <- max(loc_all_list[[l]])
		if (nall > nallmax)
		{
			nallmax = nall
		}
	}
	listLoc = matrix(0, nloc/2, nallmax+1)

	#for (p in 1:length(pops))
	for (p in pops)
	{
		#listLocPop[[pops[[p]]+1]] = matrix(0, nloc, nallmax+1)
		listLocPop[[p+1]] = matrix(0, nloc/2, nallmax+1)
	}
    #print(listLocPop)

	for (i  in 1:npop)
	{
		for (l in 1:length(loc_all_list)) #CARE 30 VS 60
		{
			for (li in 1:2)  #WARNING: works only for diploid species :O
			{
				ll = (l-1)*2+li
				#print(paste("LL, i:", ll, i))
				allel = input_m[i, ll]+1
				#print(paste("G", ll, i,allel))
				#listLoc[ll, allel] = listLoc[ll, allel] + 1
				listLoc[l, allel] = listLoc[l, allel] + 1
				#print("p")
				#print(popIndex)
				#print("i")
				#print(i)
				#print("LLPv0")
				#print(listLocPop)
				pin = popIndex[[i]]+1
				#print(paste(i, " pin ", pin))
				#listLocPop[[pin]][ll, allel]= listLocPop[[pin]][ll, allel] + 1
				listLocPop[[pin]][l, allel] = listLocPop[[pin]][l, allel] + 1
			}
		}
	}
	
	#print(listLoc)
	#print("LLP")
	#print(listLocPop)
	Ht = h_from_eff(listLoc)
	all_H2 = matrix(0,nloc/2,length(pops))
	#for (p in 1:length(pops))#listLocPop)) BUGGED?
	for (p in 1:length(pops))
	{
	    #print(paste("P", p))
		#lp = listLocPop[[popIndex[p]+1]] BUGGED?
		lp = listLocPop[[pops[p]+1]]
		#print("lp")
		#print(lp)
		if (class(lp)=="NULL")
		{
		    #print(all_H2[,p])
		    #print(rep(NaN, nloc/2))
		    #print(paste("P", p, "NULL", length(all_H2[,p]), "vs nloc/2", nloc/2))
			all_H2[,p] = rep(NaN, nloc/2)
		} else {
			hep = h_from_eff(lp)
			all_H2[,p] = hep
		}
	}
		
	print("all_H2")
	print(all_H2)
	print("Ht")
	print(Ht)

	nht = table(Ht)[names(table(Ht))==0]
	print("Ht==0") #find loci where the Ht is null (these loci will be ignored in the following computations)
        if (length(nht) > 0)
	{
	    print(as.vector(nht))
	}
	else
	{
	    print(0)
    }
	Hs = apply(all_H2, 1, mean, na.rm = TRUE)
	print("Hs")
	print(Hs)
		

	#chronometrer temps pour chaque version

	#system.time(calc_h_oneshot_BIS(genomes))

	print(paste("PT",proc.time() - ptm))

	#Gst for all locus
	Gst = (Ht - Hs)/Ht
	mGst = mean(Gst)

	print(argSpecies)

    globalSelected = c()

	for(i in names(argSelected))
	{
        print(paste(i, "-> Mean Hs qtl : "))
        print(mean(Hs[argSelected[[i]]],na.rm = TRUE))
        print(paste(i, "-> Mean Ht qtl: "))
        print(mean(Ht[argSelected[[i]]],na.rm = TRUE))

        print(paste(i, "-> Ht qtl: "))
        print(Ht[argSelected[[i]]])

        print(paste(i, "-> Mean Gstq : "))
        print(mean(Gst[argSelected[[i]]],na.rm = TRUE))
        print(paste(i, "-> Mean Gst : "))
        print(mean(Gst[-argSelected[[i]]],na.rm = TRUE))

        globalSelected <- c(globalSelected, argSelected[[i]])
	}

    for(i in names(argLtSelected))
	{
        for (j in names(argLtSelected[[i]]))
        {
            print(paste(i, "-> Mean Hs qtl", j, ": "))
            print(mean(Hs[argLtSelected[[i]][[j]]],na.rm = TRUE))
            print(paste(i, "-> Mean Ht qtl", j, ": "))
            print(mean(Ht[argLtSelected[[i]][[j]]],na.rm = TRUE))

            print(paste(i, "-> Ht qtl", j, ": "))
            print(Ht[argLtSelected[[i]][[j]]])

            print(paste(i, "-> Mean Gstq", j, ": "))
            print(mean(Gst[argLtSelected[[i]][[j]]],na.rm = TRUE))
        }
	}

	print("Global Selected: ")
	print(globalSelected)


	print("Hs : ")
	print(Hs[-globalSelected])
	print("Mean Hs : ")
	print(mean(Hs[-globalSelected],na.rm = TRUE))



	print("Ht : ")
	print(Ht[-globalSelected])
	print("Mean Ht : ")
	print(mean(Ht[-globalSelected],na.rm = TRUE))


	print("Gst : ")
	print(Gst[-globalSelected])
	print("Mean Gst : ")
	print(mean(Gst[-globalSelected],na.rm = TRUE))


	return(Ht)


}


htv2 = calc_h_oneshot_BIS(genomes)
