#library("ctc")

.EXP_TIMES_TR = 1001
.EXP_TIMES_RA = 1002
.EXP_TIMES_TR_RA = 1003
.EXP_TIME_POINT = 1004

.VOUT_TYPE_CLUSTER = 1103
.VOUT_TYPE_DENDROGRAM = 1102
.VOUT_TYPE_HEATMAP = 1101

.VOUT_FORMAT_SVG = '.svg'
.VOUT_FORMAT_JPG = '.jpg'
.VOUT_FORMAT_PNG = '.png'

.TR_PATTERN = '^TR.{1}[0-9]+'
.RA_PATTERN = '^RA.{1}[0-9]+'
.TIME_POINT_PATTERN = '^K.\\([0-9\\.]+\\){1}'

.CUT_DENDRO_HEIGHT = 2001
.CUT_DENDRO_NUM_CLUSTERS = 2002
.CUT_DENDRO_NONE = 2003

## @var int Don't use a normalization method for the column values
.NORM_METHOD_NONE <- 1201
## @var int Divide each column by the first one (first one divided by the first one will be 1.00)
.NORM_METHOD_DIV <- 1202;	
## @var int Divide each column by the first one (first one divided by the first one will be 1.00) and
## in the end use the log2 of the result 
.NORM_METHOD_LOG_DIV <- 1203;
## @var int Apply the log2 over the unmodified values of each column 
.NORM_METHOD_LOG <- 1204;

## @var int Run the normalization method after the data has been read from the file but before doing any clustering or displaying (normalized data will be clustered and then displayed)
.NORM_METHOD_WHEN_BEF_CL = 1301;
## @var int Run the normalization method after the data has been read from the file and after running the clustering method, but before displaying
.NORM_METHOD_WHEN_AFT_CL = 1302;


## what is the number of color between the 2 base colors (green lowest -> red highest)
.HEATMAP_NUM_COLS_INTERVAL = 75

## NUM OF GENES IN A INCH FOR A HEATMAP
.HEATMAP_NUM_GENES_INCH = 10

## Colors starting from green (lowest) and ending with red (highest values)
.HEATMAP_COL_GREEN_RED = 1401;
## Colors starting from yellow (lowest) and ending with blue (highest values)
.HEATMAP_COL_YELLOW_BLUE = 1402;

## the extension of the file containing the cluster details
.FILE_CLUSTER_DETAILS_EXT = '.clu'

.LIMIT_NUM_ROWS_MAT_PLOT = 50
## Number of rows alloced for a dendrogram when profiles must be printed
.NUM_ROWS_DENDRO = 3

## read the data from the file provided as input and then use only the necessary columns ##
prepareInput <-
  function(path, fileName, experimentTimes, separator, normMethod, normPlace) {   
    data <- read.csv2(file=paste(sep="", path, fileName), sep=separator, na.strings="", header=TRUE,
                      dec = '.')
    #merge row number with gene name
    #data <- transform(data, V1V2 = paste(V1, V2, sep=''))[-c(1:2)]
    #print(data[,1])
    #set the row names as the last column which now contains the merged value of first and second columns
    rownames(data) <- data[, 1]
    #print(data)
    # delete last column containing the merged value of row number and gene name
    data <- data[,-1]
    #colnames(data) <- c("x", "y")
    #datam <- as.matrix(data)
    #print(names(data))
    #print(grep(.TR_PATTERN, names(data)))
    #print(names(data)[grep(.TR_PATTERN, names(data))])
    ## select just the columns you need (TR/RA/TR and RA)
    data <- na.omit(data)
    if (experimentTimes == .EXP_TIMES_TR) {
      print('exptimes tr')
      data <- data[ , names(data)[grep(.TR_PATTERN, toupper(names(data)))]]
      if (normPlace == .NORM_METHOD_WHEN_BEF_CL) {
        #print(data)
        data <-normFunction(data, normMethod, 1, ncol(data))#, as.vector(data[,1]))
        #print(data)
      }
    } else if (experimentTimes == .EXP_TIMES_RA) {
      print('exptimes ra')
      data <-data[, names(data)[grep(.RA_PATTERN, toupper(names(data)))]]
      #print(data)
      #print(as.numeric(as.vector(data[,1])))
      if (normPlace == .NORM_METHOD_WHEN_BEF_CL) {
        data <-normFunction(data, normMethod, 1, ncol(data))
      }
                          #, as.numeric(as.vector(data[,1])) )
      #print(data)
    } else if (experimentTimes == .EXP_TIMES_TR_RA) {
      print('exptimes tr ra')
      
       ## we have 2 types of experiments, determine at which point the other type start
       ## to be able to run the normalization to that point
       firstColOtherExp <- length(names(data)[grep(.RA_PATTERN, toupper(names(data)))])
       #print (data[, firstColOtherExp+1])
       
       if (normPlace == .NORM_METHOD_WHEN_BEF_CL) {
        data <-normFunction(data, normMethod, 1, firstColOtherExp)       
        data <-normFunction(data, normMethod, firstColOtherExp+1, ncol(data))
       }
       #print (data[, firstColOtherExp+3])
      #print(data)
     
    } else if (experimentTimes == .EXP_TIME_POINT) {
      
      if (normPlace == .NORM_METHOD_WHEN_BEF_CL) {
        data <-normFunction(data, normMethod, 1, ncol(data))
      }
           
    }  
    
    return(renameHeaders(experimentTimes, data))
  }

## Function to eliminate the unwanted characters which R adds when reaing headers
renameHeaders <- function(experimentTimes, data) 
{
  if (experimentTimes == .EXP_TIME_POINT) {
    colnames(data) <- sub('\\.', '\\(', colnames(data))
    colnames(data) <- sub('\\.$', '\\)', colnames(data))
    ## We just reverse the string to replace the ast occurence if any
    #      colnames(data) <- sapply(colnames(data), rev)# sapply(
    #        #sub('\\.', '\\)', sapply(colnames(data), rev))
    #       # , rev)
    #      len <- length(colnames(data))
    #      colnamesdata <- colnames(data)
    #      for (index in 1:length) {
    #        colnamesdata[index] <- sub('\\.', '\\(', colnamesdata[index])
    #        
    #      }
  } else
    colnames(data) <- sub('\\.+', ' ', colnames(data))
  return(data)
}

## Apply the normalization procedure if requested by the user
normFunction <- function(matrixData, normMethod, normColIndex, lastColIndex) 
  {
  
  #print(normMethod)
  ## apply normalization if appropritate
  if (normMethod == .NORM_METHOD_DIV) {
    print('exptimes div')
    #print(matrixData)
    #matrixData <- sweep(as.numeric(as.matrix(matrixData)), 1, column, "/")
    start <- normColIndex+1
    for (i in start:lastColIndex) {
      #print(matrixData[,i])
      #print(as.double(as.vector(matrixData[,i])))
      #print(as.double(matrixData[,1]))
      matrixData[,i] <- as.double(as.vector(matrixData[,i])) / as.double(as.vector(matrixData[, normColIndex]))
    }
    matrixData[,normColIndex] <- 1.0;
    
  } else if (normMethod == .NORM_METHOD_LOG_DIV) {
    print('exptimes log div')
    #matrixData <- #sweep(as.matrix(matrixData), 1, column, "/")
    start <- normColIndex+1
      for (i in start:lastColIndex) {
        #print(matrixData[,i])
        #print(as.double(as.vector(matrixData[,i])))
        #print(as.double(matrixData[,1]))
        matrixData[,i] <- log2(as.double(as.vector(matrixData[,i])) / as.double(as.vector(matrixData[,normColIndex])))
      }
    matrixData[,normColIndex] <- 0.0;
    #matrixData <- log2(matrixData)
    
  }  else if (normMethod == .NORM_METHOD_LOG) {
    print('exptimes log')
    #matrixData <- #sweep(as.matrix(matrixData), 1, column, "/")
    for (i in normColIndex:lastColIndex) {
      matrixData[,i] <- log2(as.double(as.vector(matrixData[,i])))
    }    
  }
  return(matrixData)
  
}

getDendro <- function(hclustObj, dataObj, typeCut, cutValue) {
  if (typeCut == .CUT_DENDRO_HEIGHT) {
    hclustObj <- as.dendrogram(clip.clust(hclustObj, data = data, h=cutValue))
    hclustObj <- applyDendroLabels(hcInit, labels = paste("Cluster", 1:args[13], sep=""))
  } else if (typeCut == .CUT_DENDRO_NUM_CLUSTERS) {
    hclustObj <- as.dendrogram(clip.clust(hcInit, data = data, k=cutValue))
    hclustObj <- applyDendroLabels(hclustObj, labels =  paste("Cluster", 1:cutValue, sep=""))    
  }
  return(hclustObj)
}

## function which apply a list of labels on a cutted dendro
applyDendroLabels <- function(dendroObj, labels) {
  local({
    newLab <<- function(n) {
      if(is.leaf(n)) {
        a <- attributes(n)
        i <<- i+1
        attr(n, "label") <- labels[i]
      }
      n
    }
    i <- 0
  })
  dendroObj <- dendrapply(dendroObj, newLab)
  return(dendroObj)
}

saveAsNewick <- function(path, fName, hc) {
  write(hc2Newick(hc), file=paste(sep='', path, paste(sep='', fName, '.newick'))) 
}

saveAsCsv <- function(fullFpath, data, expTimes) {
#    rowN <- c()
#    for (i in 1:nrow(data)) 
#      rowN[i] <- paste(sep='', 'Cluster', i)
#    if (expTimes == .EXP_TIME_POINT) {
#      colnames(data) <- sub('\\.', '\\(', colnames(data))
#      colnames(data) <- sub('\\.$', '\\)', colnames(data))
#      ## We just reverse the string to replace the ast occurence if any
# #      colnames(data) <- sapply(colnames(data), rev)# sapply(
# #        #sub('\\.', '\\)', sapply(colnames(data), rev))
# #       # , rev)
# #      len <- length(colnames(data))
# #      colnamesdata <- colnames(data)
# #      for (index in 1:length) {
# #        colnamesdata[index] <- sub('\\.', '\\(', colnamesdata[index])
# #        
# #      }
#    } else
#     colnames(data) <- sub('\\.+', ' ', colnames(data))
    
  #print(colN)
  write.table(data, fullFpath,
              sep=args[6],
              row.names=rownames(data),
              col.names=NA,
              quote=FALSE)
}

## Plots a dendrogram followed by a list of genes' profiles
## data - Input data used for clustering
## resultMatrix - the result matrix with all the data which will be displayed line by line
## expTimes - the type of times present in the data and used for clustering
## matrixSettingsPlot - The settings for a plot, namely a parameter containing the graphical arrangement of the images (dendrogram and profiles)
## dendroObj - The object which will be displayed as a dendrogram
plotDendroProfiles <- function(data, expTimes, resultMatrix, matrixSettingsPlot, dendroObj) {
  if (expTimes == .EXP_TIMES_TR_RA) {
    print('plotDendroProfiles***************************************')
    locPatterns <- grep(.RA_PATTERN, toupper(names(data)))
    print(length(locPatterns))
    print(names(data))
    ## Get the number of times
    firstColOtherExp <- length(names(data)[locPatterns])
    ## Init the rownames of the arrays as the total number of rows multiplied by 2 (there are 2 kind of columns and each type of it will be represented on a separate drawing)
    rowNamesArr <- c(firstColOtherExp*2)
    rowNamesOrig <- rownames(resultMatrix)
    #print(firstColOtherExp)
    ## get the number of rows in the matrix which will be displayed; 
    numRowsDisplayMat <- (nrow(matrixSettingsPlot) - .NUM_ROWS_DENDRO) * ncol(matrixSettingsPlot)
#     limitNames <- (nrow(matrixSettingsPlot)-.NUM_ROWS_DENDRO)*ncol(matrixSettingsPlot)
#     print (limitNames)
    rowNamesOrig <- rowNamesOrig[1:(numRowsDisplayMat)]
   # print(nrow(resultMatrix))
    ## Init the display matrix
    displayMat <- matrix(c(rowNamesOrig),
                         nrow = numRowsDisplayMat, ncol = firstColOtherExp
                         )
    numExpTimesTreeMat <- ncol(resultMatrix)
    #print(numExpTimesTreeMat)
    #print(resultMatrix)
    #     print(displayMat)
    #     print(length( sotaCl$tree[1, (firstColOtherExp+4):numExpTimesTreeMat]))
    
    ## Check if either RA or TR is the first type of times and set the drawings' names accordingly (they will be added at the end of each cluster/gene name)
    if (locPatterns[1] > 0) {
      firstCol <- ' - TR'
      secCol <- '- RA'
    } else {      
      firstCol <- ' - RA'
      secCol <- '- TR'
    }
    
    ## Loop over the elements in the result matrix and extract TR, add it in the display matrix and then extract RA and add it on the next line in the display matrix
    resultMatrix <- as.matrix(resultMatrix)
    for (index in seq(1, numRowsDisplayMat, 2)) {
      indexSmall <- as.integer(index/2)+1
      rowNamesArr[index] <- paste(sep=' ', rowNamesOrig[indexSmall], firstCol)
      displayMat[index, ] <- as.vector(resultMatrix[indexSmall, 1:firstColOtherExp])
      #print(as.integer(index/2))
      #print(displayMat)
      rowNamesArr[index+1] <- paste(sep=' ', rowNamesOrig[indexSmall], secCol)
      displayMat[index+1, ] <- resultMatrix[indexSmall, (firstColOtherExp+1):numExpTimesTreeMat]
    }
    rownames(displayMat) <- rowNamesArr
    #print(data)
    #     print(sotaCl$tree)
    #     print(displayMat)
    
    plotDP(displayMat, matrixSettingsPlot, dendroObj, 
           #strapply(colnames(resultMatrix[, 1:firstColOtherExp]), "\\d+", as.numeric, simplify = TRUE)
           as.numeric(gsub("\\D", "", colnames(resultMatrix[, 1:firstColOtherExp]))))
    
  } else {
    plotDP(resultMatrix, matrixSettingsPlot, dendroObj,
           #strapply(colnames(resultMatrix), "\\d+", as.numeric, simplify = TRUE)
           as.numeric(gsub("\\D", "", colnames(resultMatrix))))
  }
  
}

## Plots a dendrogram followed by a list of genes' profiles
## matrixPlot - the actual matrix containing the lines which will be transposed in plots; Each graphic is corresponding to a line in the matrix
## matrixSettingsPlot - The settings for a plot, namely a parameter containing the graphical arrangement of the images (dendrogram and profiles)
## dendroObj - The object which will be displayed as a dendrogram
## xLabelsArr - An array containing the labels for the X axis (usually the times extracted from the header)
plotDP <- function(matrixPlot, matrixSettingsPlot, dendroObj, xLabelsArr) {  
  nf <- layout(mat = matrixSettingsPlot)
  #layout.show(0)
  plot(dendroObj)
  
    ## Get the number of plots as the number of lines in the matrix; The number of figs is the total num of
    ## lines in the matrix plot multiplied by the num of cols (in the case of TR_RA there are 2 cols otherwise
    ## 3) from which the first 3 lines must be eliminated (that is the space for the dendrogram)
    numFigs <- nrow(matrixSettingsPlot)*ncol(matrixSettingsPlot) - ncol(matrixSettingsPlot)*3
  
  ## If we have blanks in the matrix plot, eliminate them (by considering only the real figures)
  if (nrow(matrixPlot) < numFigs)
    numFigs <- nrow(matrixPlot)
  
  numExpTimes <- ncol(matrixPlot)
  namesArr <- rownames(matrixPlot)
  matrixPlot <- as.matrix(matrixPlot)
  #layout.show(2)
  print(paste("num figs = ", numFigs))
  for (index in 1:numFigs) {
    heading = namesArr[index]#paste(sep = '', "Gene/Cluster name = Cluster", index)
    vec <- as.vector(matrixPlot[index, ])
    plot.default(vec, type = 'b', xaxt="n", main=heading, ann=TRUE, xlab='Experiment Times',
                 ylab='Values')
    axis(side=1, at=c(1:numExpTimes), xLabelsArr)#labels=c(0,2,4,6,8,10,15))
    title(namesArr[index])
  }  
}

## Generates the matrix used to divide the screen in as many regions as required by the algorithm to display both the dendro and the profiles
## expTime - the type of times present in the data and used for clustering
## numClusters - number of clusters (or all genes determined by the algorithm)
createMatrixPlot <- function(expTime, numClusters) {  
    ## Determine the number of columns of the profiles plot; If the experiment is TR_RA then it has 2 columns, else 1
    if (args[5] == .EXP_TIMES_TR_RA) {
      plotProfCols <- 2
      plotProfLines <- numClusters+3
      ## We have a limit of .LIMIT_NUM_ROWS_MAT_PLOT for the plot matrix; reduce the num of rows if necessary
      if (plotProfLines > .LIMIT_NUM_ROWS_MAT_PLOT) {
        plotProfLines <- .LIMIT_NUM_ROWS_MAT_PLOT
        ## 4 is the number of lines kept for the dendrogram; 2 is the number of columns; 1 is the margin
        endLimit <- (.LIMIT_NUM_ROWS_MAT_PLOT - 3)*2 + 1
      } else {
        endLimit <- numClusters*2+1
      }
      #print(numClusters)
      matrixPlot <- matrix(c(1, 1, 1, 1, 1, 1, 2:endLimit), plotProfLines, plotProfCols, byrow=TRUE)
    } else {
      #print(numClusters)
      plotProfCols <- 3
      plotProfLines <- ceiling(numClusters / plotProfCols) + 3
        
      print('createMatrixPlot********************************************')
      print(plotProfLines)
      
      #print(numFillBlanks)
      ## Determine the limit for the figures to be printed
      if (plotProfLines > .LIMIT_NUM_ROWS_MAT_PLOT) {
        plotProfLines <- .LIMIT_NUM_ROWS_MAT_PLOT
        ## Determine the max num of clusters
        numClusters <- (.LIMIT_NUM_ROWS_MAT_PLOT - 3)*3
        ## Determine how many spaces you have to add in the matrix such as the vector composing the matrix
        ## will have the num of elements equal with plotProfCols*plotProfLines; -3 from plotProfLines because
        ## the first 3 lines are occupied by the dendro
        numFillBlanks <- plotProfLines*(plotProfLines-3) - numClusters +1
        ## 2 is the number of lines kept for the dendrogram; 3 is the number of columns
        endLimit <- ##(.LIMIT_NUM_ROWS_MAT_PLOT - 3)*3 + 1 
        (.LIMIT_NUM_ROWS_MAT_PLOT - 3)/3 + numFillBlanks
      } else {
        ## Determine how many spaces you have to add in the matrix such as the vector composing the matrix
        ## will have the num of elements equal with plotProfCols*plotProfLines; -3 from plotProfLines because
        ## the first 3 lines are occupied by the dendro
        numFillBlanks <- (plotProfLines-3)*3 - numClusters+1 #plotProfLines*(plotProfLines-3) - numClusters +1
        endLimit <- numClusters+numFillBlanks
      }
      matrixPlot <- matrix(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 2:endLimit), plotProfLines, 
                           plotProfCols, byrow=TRUE)
      print(paste("end limit = ", endLimit))
      print(matrixPlot)
    }
    return(matrixPlot)
}
