# UPGMA implementation with parameters from UI
# args[1] = the path of the current experiment
# args[2] = the name of the input file
# args[3] = the name of the output file
# args[4] = success message
# args[5] = what experiment times to use (TR, RA, TR and RA)
# args[6] = separator of data in the source file
# args[7] = type of output (cluster print or heatmap)
# args[8] = output format (SVG, JPG, PNG)
# args[9] = normalization Method (None, Division, Log)
# args[10] = normalization place (After reading data - before clustering, After clustering - before displaying)
# args[11] = color for heatmap (green to red, yellow to blue)

# args[12] = type of dendrogram cutting (height, num clusters, none)
# args[13] = height at which the dendrogram will be cut/ num clusters to be left after cutting dendro;
#         This value is ignored if args[12] has the index for 'none'
# args[14] = method the agglomeration method to be used. 
#          This should be (an unambiguous abbreviation of) one of 
#         "ward", "single", "complete", "average", "mcquitty", "median" or "centroid".
# args[15] = method  the distance measure to be used. 
#         This must be one of "euclidean", "maximum", "manhattan", "canberra", 
#         "binary", "pearson", "abspearson", "correlation", "abscorrelation",
#         "spearman" or "kendall". Any unambiguous substring can be given
#params: method for hclust, method for Dist, 
# sink("myfile.txt") # redirect console output to a file
# load lib files from your path
#.libPaths('/home/andrei/R/x86_64-pc-linux-gnu-library/2.14/')
.libPaths('lib/linux/')
library("amap")
library("RSvgDevice")
library("phangorn")
library("gplots")
require(graphics); require(grDevices)
library(maptree)

## load the common part of the algorithms ##
source('/home/andrei/univ/master/eclipse_workspace/PFC/src/pfc/protected/commands/shell/r/common.R')
#source("/home/andrei/sitio/PFC/src/pfc/protected/commands/shell/r/common.R")


## save both required files: the one containing the clusters themselves with the mean for each experiment time and the other with the description for each cluster (name, num genes and the list of genes)
## clustData - data frame containint the data used for clustering
## geneMembership - the list with the name of the genes (THE NAMES OF THE GENES ARE ASSUMED TO BE IN THE SAME ORDER AS IN THE CLUSTDATA PARAMETER, E.G. THE FIRST GENE IN THE LIST WILL BE THE FIRST GENE IN THE DATAFRAME) and theirs membership to each cluster as returned by the group.clust method
## numClusters - the number of clusters obtained after cutting the dendrogram ether by height or final num clusters
## fileNameFull - the full file path (including directories in the path) without extension (the extension is going to be added in this function) 
## sep - the separators for each cell on each row
## @return The matrix with the mean values (for each experiment time) of the genes in a certain cluster
saveClusterFiles <- function(clustData, geneMembership, numClusters, fileNameFull, sep = ';') {
  #mode(clustData) <-'numeric'
  lenMemb <- length(geneMembership)
  listOfGenes <- c(1:numClusters)
  clusterDims <- c(1:numClusters)
  ## init the arrays with the empty string and 0 respectevely
  for (index in 1:numClusters) {
    listOfGenes[index] <- ''
    clusterDims[index] <- 0
  }
  resultMatrix <- matrix(0, nrow = numClusters, ncol = ncol(clustData))
  ## assign the rows and columns names for the resulting matrix
  rownames(resultMatrix) <- c(paste(sep='', 'Cluster', index <- 1:numClusters))
  colnames(resultMatrix) <- colnames(clustData)
  namesGenes <- rownames(clustData)
  #print(resultMatrix)
  #print(geneMembership)
  numCols <- length(colnames(clustData))
  print('num cols: ');print(numCols)
  ## iterate over grouping result and create the detail
  for (index in 1:lenMemb) {
    ## add a new gene (as string) on the cluster's index position
    listOfGenes[geneMembership[index]] <- paste(sep = sep, listOfGenes[geneMembership[index]], namesGenes[index])
    ## sum the line corresponding to the cluster with the gene's line selected from the data set used for clustering
    #print(resultMatrix)
    #print(geneMembership[index])
    #print(clustData[index, ])
    #print(index)
    rowIndex <- geneMembership[index]
    for (j in 1:numCols) {
      #print((as.double(resultMatrix[rowIndex, j]) + as.double(clustData[index, j])))
      resultMatrix[rowIndex, j] <- resultMatrix[rowIndex, j] + clustData[index, j]
    }
    #print(resultMatrix)
#     print(resultMatrix[geneMembership[index], ] +  
#       clustData[index, ])
#     resultMatrix[geneMembership[index], ] <- resultMatrix[geneMembership[index], ] +  
#                                                  clustData[index, ]
    ## increment the number of genes in a cluster
    clusterDims[geneMembership[index]] <- clusterDims[geneMembership[index]] + 1
  }
  
  #print(listOfGenes)
  #print(colSums(resultMatrix))
  #print(resultMatrix)
  ## loop over all clusters and add the names with theirs dimensions in the listOfGenes and divide each column of each line of resultMatrix with the number of cluster (arithmetic mean in the end) corresponing for each line
  for (index in 1:numClusters) {
    listOfGenes[index] <- paste(sep = '', paste(sep = sep, paste(sep = '', 'Cluster', index), clusterDims[index]), listOfGenes[index])
    #tapply(resultMatrix[index, ], 20, '/')
    #print(clusterDims[index])
    resultMatrix[index, ] <- resultMatrix[index, ] / clusterDims[index]
  }
  
  #print(listOfGenes)
  #print(resultMatrix)
  ## write the cluster details file with name of the cluster, number of genes in the cluster and the lis tof genes
  write(listOfGenes, file = paste(sep='', fileNameFull,  .FILE_CLUSTER_DETAILS_EXT), sep='\n', 
        ncolumns = 1)
  ## save the result of the clustering with the means for each experiment time on each row (each row being a cluster)
  saveAsCsv(paste(sep='',fileNameFull, '.csv'), resultMatrix, args[5])
  return(resultMatrix)
}


args <- commandArgs(TRUE)
# args[1] <- "/home/andrei/univ/master/eclipse_workspace/PFC/docs/data_of_pfc/"
# args[2] <- "upgma_err.csv"
# args[3] <- "result-upgma"
# args[4] <- "succ"
# args[5] <- .EXP_TIME_POINT
# args[6] <- ";"
# args[7] <- .VOUT_TYPE_DENDROGRAM
# args[8] <- .VOUT_FORMAT_SVG
# args[9] <- .NORM_METHOD_DIV
# args[10] <- .NORM_METHOD_WHEN_AFT_CL
# args[11] <- .HEATMAP_COL_GREEN_RED
# 
# args[12] = .CUT_DENDRO_NUM_CLUSTERS
# args[13] = 23
# args[14] = 'mcquitty'
# args[15] = 'euclidean'

data <- prepareInput(args[1], args[2], args[5], args[6], args[9], args[10])

#print(args[13])
#print(args[14])
hcInit <- hclust(Dist(data, args[15]),  args[14])
# print(hcInit$order)
# print(hcInit$height)
# print(hcInit$merge)
# plot(hcInit)

#print(hcInit$order)
#hc <- as.dendrogram(hcInit)



## If the result is a dendrogram, prepare the output for the genes' profiles
if (args[7] == .VOUT_TYPE_DENDROGRAM) {
  if (args[12] == .CUT_DENDRO_HEIGHT) {
    ## cut to create a dendrogram
    hcInit2 <- clip.clust(hcInit, data = data, h=as.double(args[13]))
    ## save the cluster composition 
    resultMatrix <- saveClusterFiles(data, group.clust(hcInit, h=as.numeric(args[13])), length(hcInit2$order), paste(sep='',args[1], args[3]), args[6])
    hcInit <- hcInit2
    ## create the dendrogram
    dendroHc <- applyDendroLabels(as.dendrogram(hcInit), labels = paste("Cluster", 1:length(hcInit$labels), sep=""))
  } else if (args[12] == .CUT_DENDRO_NUM_CLUSTERS) {
    ## save the cluster composition 
    resultMatrix <- saveClusterFiles(data, group.clust(hcInit, k=as.numeric(args[13])), as.numeric(args[13]), paste(sep='',args[1], args[3]), args[6])
    ## re-cut to create a dendrogram
    hcInit <- clip.clust(hcInit, data = data, k=as.numeric(args[13]))
    ## create the dendrogram
    dendroHc <- applyDendroLabels(as.dendrogram(hcInit), labels = paste("Cluster", 1:args[13], sep="")) 
  } else if (args[12] == .CUT_DENDRO_NONE) {
    ## create the dendrogram
    dendroHc <- as.dendrogram(hcInit)
    resultMatrix <- data
  } 
  ## Generate the area for the plotting action
  print(paste("rows matrix res = ", nrow(resultMatrix)))
  matrixSettingsPlot <- createMatrixPlot(args[5], nrow(resultMatrix))
  ## In inches
  plotHeight <- (nrow(matrixSettingsPlot)+1) * 1.5
} else {
  resultMatrix <- data
  ## The default value when the plot is not a dendrogram, in inches
  plotHeight <- ceiling(nrow(resultMatrix) / .HEATMAP_NUM_GENES_INCH)
}

#print(resultMatrix)

if (args[8] == .VOUT_FORMAT_SVG) {
  devSVG(file=paste(sep='', args[1], paste(sep='', args[3], args[8])),
         width = 10, height = plotHeight)
} else if (args[8] == .VOUT_FORMAT_JPG) {
  jpeg(file=paste(sep='', args[1], paste(sep='', args[3], args[8])),
       width=7, height = plotHeight, units = 'in', res = 100)
} else {
  png(file=paste(sep='', args[1], paste(sep='', args[3], args[8])), 
      width = 7, height = plotHeight, units = 'in', res = 100 )
}

## normalize data before displaying
if (args[10] == .NORM_METHOD_WHEN_AFT_CL) { 
  ## the case in which we have 2 types of columns needs special attention since it is required to filter
  ## each type of experiment independently
  if (args[5] == .EXP_TIMES_TR_RA) {
    firstColOtherExp <- length(names(data)[grep(.RA_PATTERN, toupper(names(data)))])
    resultMatrix <- normFunction(resultMatrix, args[9], 1, firstColOtherExp)   
    resultMatrix <-normFunction(resultMatrix, args[9], firstColOtherExp+1, ncol(data))      
  } else  {
    resultMatrix <- normFunction(resultMatrix, args[9], 1, ncol(resultMatrix))
  }
} 
#print(resultMatrix)
if (args[7] == .VOUT_TYPE_DENDROGRAM) {  
  plotDendroProfiles(data, args[5], resultMatrix, matrixSettingsPlot, dendroHc)
  #plot(dendroHc)
} else if (args[7] == .VOUT_TYPE_HEATMAP) {
  if (args[11] == .HEATMAP_COL_GREEN_RED) {
    colorMapU = greenred(.HEATMAP_NUM_COLS_INTERVAL);
  } else if (args[11] == .HEATMAP_COL_YELLOW_BLUE) {
    colorMapU = colorpanel(.HEATMAP_NUM_COLS_INTERVAL, 'yellow', 'black', 'blue');
  }
  #printFrame
#dev.new(width=5, height=ncol(data))
heatmap.2(data.matrix(resultMatrix)#, 
        ,distfun = FALSE
          #,  lmat=rbind( c(3, 4,0), c(2,1,1)), lwid=c(0.2, 1,0.5 ), lhei= c(0.18,0.5),
          ,Colv = FALSE
          , Rowv= as.dendrogram(hcInit) 
          ,dendrogram = "none"
          , hclustfun = FALSE
          , trace = 'none'
          , na.color = 'black'
          , col=colorMapU
          );
}

dev.off()
## If the algorithm will print this it means it could run all the functions thus the execution is successfull
print(args[4])

#sink() # restore output to the screen
