# This is the kmeans algorithm especially built for bioinformatics
# args[1] = the path of the current experiment
# args[2] = the name of the input file
# args[3] = the name of the output file
# args[4] = success message
# args[5] = what experiment times to use (TR; RA; TR and RA;Time point)
# args[6] = separator of data in the source file
# args[7] = type of output (cluster print or heatmap)
# args[8] = output format (SVG, JPG, PNG)
# args[9] = normalization Method (None, Division, Log)
# args[10] = normalization place (After reading data - before clustering, After clustering - before displaying)
# args[11] = color for heatmap (green to red, yellow to blue)

# args[12] = print the centers and correlation either in one big matrix or in 2 different ones one after another
# args[13] = centers: either the number of clusters or a set of initial centers
# args[14] = maximum number of iterations
# args[15] = the distance measure having of these values:
#	‘"euclidean"’, ‘"maximum"’, ‘"manhattan"’, ‘"canberra"’,
#   ‘"binary"’, ‘"pearson"’ , ‘"abspearson"’ ,
#   ‘"abscorrelation"’, ‘"correlation"’, ‘"spearman"’ or
#   ‘"kendall"’
# args[16] = if centers is 1 number, then how many random sets whould be chosen
.libPaths('lib/linux/')
require(graphics)
library("RSvgDevice")
library("amap")
library("gplots")
#library("Cairo")
require(grDevices)

## Display the TR RA as one big matrix where the rows and columns are obtained by merging TR and RA arrays of times
.KMEANS_CLUSTER_SAME_MAT = 1501
## Display the TR RA as 2 different matrices with rows and columns respresented by the times of TR and/or RA respectevely
.KMEANS_CLUSTER_DIFF_MAT = 1502

## load the common part of the algorithms ##
source("/home/andrei/univ/master/eclipse_workspace/PFC/src/pfc/protected/commands/shell/r/common.R")
#source("/home/andrei/sitio/PFC/src/pfc/protected/commands/shell/r/common.R")
#source('common.R')
args <- commandArgs(TRUE)
# a 2-dimensional example
#data <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
#          matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
#colnames(x) <- c("x", "y")
#(cl <- kmeans(x, 5))
# args[1] <- "/home/andrei/univ/master/eclipse_workspace/PFC/docs/data_of_pfc/"
# args[2] <- "kmeans_err-stabilogen.csv"
# args[3] <- "result_kmeans"
# args[4] <- "succ"
# args[5] <- .EXP_TIME_POINT
# args[6] <- ";"
# args[7] <- .VOUT_TYPE_CLUSTER
# args[8] <- .VOUT_FORMAT_SVG
# args[9] <- .NORM_METHOD_NONE
# args[10] <- .NORM_METHOD_WHEN_AFT_CL
# args[11] <- .HEATMAP_COL_GREEN_RED
# 
# args[12] <- .KMEANS_CLUSTER_DIFF_MAT
# args[13] <- 10
# args[14] <- 10
# args[15] <- "euclidean"
# args[16] <- 4

##load data from file and prepare it for the algorithms
data <- prepareInput(args[1], args[2], args[5], args[6], args[9], args[10]);
#print(args[9])


# getPearsonCorrelation <- function(dataPC, startTime, endTime) {
#   numGenPC <- nrow(dataPC)
#   for (i in startTime:endTime)
#     for (j in startTime:endTime)
#       print(cor.test(dataPC[1:numGenPC, i], dataPC[1:numGenPC, j],method='pearson'))
# }

#dev.off()
if (grepl("_", args[13])) {# if the centers are set by the user, use those
	#print('extract centers')
	#extract centers
	kCenters = t(unlist(
    strsplit(args[13], split='_', fixed=TRUE)))
  #print(kCenters)
	cl <- Kmeans(na.omit(data), centers=kCenters, iter.max=as.numeric(args[14]), 
			method = args[15])
} else {# else pick a random number of centers
	print('set the number of random choosen centers')
	cl <- Kmeans(na.omit(data), centers=as.numeric(args[13]), iter.max=as.numeric(args[14]), 
			method = args[15], nstart=as.numeric(args[16]))
}

result <- cl$centers
rownames(result) <- c(paste(sep='', 'Cluster', index <- 1:length(cl$size)) ) 
## save the clusters details in a file
index <- 0
clusterString <-  c(paste(sep='', paste(sep='', paste(sep='', 'Cluster', index <- 1:length(cl$size)), args[6]),  cl$size[index])  )
lenCluster <- length(cl$cluster)
colNCluster <-attributes(cl$cluster)$names
#print(cl$cluster)
for (i in 1:lenCluster)
  clusterString[cl$cluster[[i]]] <- paste(sep='', paste(sep='', clusterString[cl$cluster[[i]]], args[6]), colNCluster[i])
#print(cl$cluster[[i]])
write(clusterString, file = paste(sep='',args[1], paste(sep='', args[3], .FILE_CLUSTER_DETAILS_EXT)), sep='\n', 
      ncolumns = 1)
## save the resulted clusters in a csv
saveAsCsv(paste(sep='',args[1], paste(sep='', args[3], '.csv')), result, args[5])


## normalize data before displaying
if (args[10] == .NORM_METHOD_WHEN_AFT_CL) {
  ## the case in which we have 2 types of columns needs special attention since it is required to filter
  ## each type of experiment independently
  if (args[5] == .EXP_TIMES_TR_RA) {
    firstColOtherExp <- length(names(data)[grep(.RA_PATTERN, toupper(names(data)))])
    result <- normFunction(result, args[9], 1, firstColOtherExp)   
    result <-normFunction(result, args[9], firstColOtherExp+1, ncol(data))      
  } else  {
    result <- normFunction(result, args[9], 1, ncol(result))
  }
}
#print(result)
#cl <- kmeans(na.omit(data), 2)
## full name (including path) of the file with the visual output
fileRGrNameFull <- paste(sep='', args[1], paste(sep='', args[3], args[8]))

## Check the type of output and adjust the height accordingly
if (args[7] == .VOUT_TYPE_CLUSTER) {
  ## Check if we have the times in the same matrix; if yes then we need just half of the times
  if (args[5] == .EXP_TIMES_TR_RA && args[12] == .KMEANS_CLUSTER_SAME_MAT) {
    plotHeight <- (ncol(result)/2+1)
  } else {
    plotHeight <- (ncol(result)+1)
  }
} else {
  plotHeight <- (nrow(result)+1)
}
print(plotHeight)
#png(filename="temp/clustering.png", width=500, height=500)

## oma = A vector of the form c(bottom, left, top, right) giving the size of the outer margins in lines of text.
if (args[8] == .VOUT_FORMAT_SVG) {
  devSVG(file=fileRGrNameFull, width = 10, height = plotHeight)
  if (args[5] == .EXP_TIMES_TR_RA) {
    ## Compute the half distance as the number of character lines that can be stuffed in the current image's height
    halfDist <- (plotHeight/(par()$cin[2]))
    print(par()$cin)
    #height = plotHeight
    omaUpper <- c(halfDist, 3, 2, 3)
    omaLower <- c(2, 3, halfDist, 3)
  }
} else if (args[8] == .VOUT_FORMAT_JPG) {
  omaUpper <- c(2, 3, 2, 3)
  omaLower <- c(2, 3, 2, 3)
  if (args[5] == .EXP_TIMES_TR_RA && args[7] == .VOUT_TYPE_CLUSTER) {
    jpeg(file=paste(sep='.', fileRGrNameFull, 'temp.%01d'),
       width=7, height = plotHeight, units = 'in', res = 100)
  } else {
    jpeg(file=fileRGrNameFull,
         width=7, height = plotHeight, units = 'in', res = 100)
  }
} else {
  omaUpper <- c(2, 3, 2, 3)
  omaLower <- c(2, 3, 2, 3)
  ## If it is a file having .EXP_TIMES_TR_RA and the output type is a cluster then create multiple files
  if (args[5] == .EXP_TIMES_TR_RA && args[7] == .VOUT_TYPE_CLUSTER) {
    png(file=paste(sep='.', fileRGrNameFull, 'temp.%01d'),
         width=7, height = plotHeight, units = 'in', res = 100)
  } else {## Create just one file for drawing
    png(file=fileRGrNameFull,
         width=7, height = plotHeight, units = 'in', res = 100)
  }
}
#print(data)
#print(cl$cluster)
#print(cl$size)
#plot(cl)#data.centers, col = cl$cluster, main = "K Means tree")
#cl$cluster
print("lh")
print(par()$cin)
print(plotHeight/(par()$cin[2]))
print((plotHeight/(par()$cin[2]))/2)
#plot(cl , col=cl$cluster, xlab="", ylab="",);
if (args[7] == .VOUT_TYPE_CLUSTER) {
  #data <- as.numeric(as.matrix(na.omit(data)))
  #mydist <- as.dist(1-cor(t(data), method='pearson'))
  #print(mydist)
  #mds = cmdscale(mydist); 
  #mycol <- as.vector(cl$cluster)
  #print(mycol)
  #mycol <- rainbow(length(unique(mycol)), start=0.1, end=0.5)[mycol] # color selection steps
  #plot(mds[,1], mds[,2], pch=20, col=mycol, xlab="", ylab="")
  #plot(data, col = result)
  #print(result)
  colors = rainbow(nrow(result))#[result]
  
  panel.cor <- function(x, y, digits=4, prefix="", cex.cor)
  {
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y, method = "pearson"))
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(prefix, txt, sep="")
    if(missing(cex.cor)) cex <- 0.6/strwidth(txt)
    #text(0.5, 0.5, txt, cex = cex * r)
    text(0.5, 0.5, txt, cex = cex)
  }
  if (args[5] == .EXP_TIMES_TR_RA && args[12] == .KMEANS_CLUSTER_DIFF_MAT) {
    firstColOtherExp <- length(names(data)[grep(.RA_PATTERN, toupper(names(data)))])
    #par(mfrow=c(2,1))
     #split.screen( c( 2, 1 ) )
     #screen(1)
    
#     ##  Set the outer margins so that bottom, left, and right are 0
#     ##  and top is 3 lines of text.
#     
#     old.par <- par( no.readonly = TRUE )
#     par( oma = c( 0, 0, 3, 0 ) )
#     
#     ##  Plot using layout().
#     
#     nf <- layout( matrix( c( 1, 2 ), 1, 2, byrow = TRUE),
#                   c( 1, 1 ), c( 1, 1 ), TRUE )
#     layout.show( nf )
    #oldpar <- par(mfrow=c(1,1))
    pairs(result[1:nrow(result), 1:firstColOtherExp],  
          lower.panel=function(...) panel.smooth(..., col = colors), upper.panel = panel.cor, oma = omaUpper 
          )
    
    #par(new = TRUE)
    #close.screen(1)
    #par("new")
    #erase.screen(1)
    #screen(2)
    pairs(result[1:nrow(result), (firstColOtherExp+1):ncol(result)],  
          lower.panel=function(...) panel.smooth(..., col = colors), upper.panel = panel.cor, oma = omaLower
          )
    #par(oldpar)
    #erase.screen(1)
    #plot(p1)
    #close.screen(all.screens=TRUE)
  } else
    pairs(result,  lower.panel=function(...) panel.smooth(..., col = colors), upper.panel = panel.cor)
} else if (args[7] == .VOUT_TYPE_HEATMAP) {
  if (args[11] == .HEATMAP_COL_GREEN_RED) {
    colorMapK = greenred(.HEATMAP_NUM_COLS_INTERVAL);
  } else if (args[11] == .HEATMAP_COL_YELLOW_BLUE) {
    colorMapK = colorpanel(.HEATMAP_NUM_COLS_INTERVAL, 'yellow', 'black', 'blue');
  }
  #print(result)
  heatmap.2(data.matrix(result)
            , Rowv = FALSE
            , distfun = function(x) {x}
            ,  lmat=rbind( c(3, 4,0), c(2,1,1)), lwid=c(0.2, 1,0.5 ), lhei= c(0.18,0.5)
            ,Colv = FALSE
            , hclustfun = function(x) {x}
            ,dendrogram = "none"
            #, hclustfun = FALSE
            , trace = 'none'
            , na.color = 'black'
            , col=colorMapK
            );
}


#plot(mds, col = mds$cluster)
dev.off()

## Unify all graphics files into one if the output is PNG or JPG
if (args[7] == .VOUT_TYPE_CLUSTER && args[5] == .EXP_TIMES_TR_RA &&
  args[8] != .VOUT_FORMAT_SVG) {
  system(paste(sep='', 'montage ',
               paste(sep='', fileRGrNameFull, 
                     paste(sep='', '.temp.* ',
                           paste(sep='', '-background none -tile 1x2 -geometry +0+0 ', fileRGrNameFull)))))
  unlink(paste(sep='', fileRGrNameFull,  '.temp.*'))
}

## If the algorithm will print this it means it could run all the functions thus the execution is successfull
print(args[4])

