###################################################################################################
## readExcelCountTable function definition
## =======================================
##
## Purpose: data manipulation function for converting Excel file with multiple spreadsheets
##          to database-like dataframe
## 
## Arguments:
##    - excel.file.name: a character containing the path to the Excel file.
##	   - tree.description.sheet.name: an optional character containing the name of the Excel file sheet
##        containing tree description (by default: "Tree description").
##		- sampling.description.sheet.name: an optional character containing the name of the Excel file sheet,
##        which contains the correspondance between samples and dates (by default: "Sampling description").
##
## Output:
##		- a data.frame in database-like format with cell count data and information about trees and dates
##
## Versions:
##  1.1. Using gdata
##	   1.2. Taking into account previous ring measurement
##	   1.3. Adding the year if asked (optional)
##    1.4-1. Removing the year option and correcting a bug concerning P
##  2.1. Reading additional information in additional sheets to build final data file
##    2.1-0. Changing the name of the output variables for Ca, Ea, etc.
##    2.1-1. Changing the name of the output variables for CZ, EZ, etc.
##    2.1-2. Changing the name of the output variables for CZ, EZ, WZ, MZ, PR.
##    2.1-3. Ordering qualitative variable (Site, Year, Species, Tree)
##
## Started: 13 Octobre 2010
## Last modified: 10 January 2017
## Author: Cyrille RATHGEBER - LERFoB UMR1092 - INRA Nancy
##
###################################################################################################

readExcelCountTable <- function(excel.file.name,
                                tree.description.sheet.name = "Tree description",
                                sampling.description.sheet.name = "Sampling description") {

  ## ------------------------
  ## Loading required library
  ## ------------------------
  library(gdata)


  ## -----------------------------------------------------
  ## Reading tree description sheet in the input data file
  ## -----------------------------------------------------
 
  message("--> Reading of tree description sheet... \r")
  TreeDF  <- read.xls(excel.file.name, sheet=tree.description.sheet.name)

  ## --> Checking that all the variables needed are present in the input file and aborting
  ##      or sending a warning if it is not the case.
  ## + forcing them to the right format if there are present
  ## -------------------------------------------------------------------------------------
 
  TreeDF.col.names <- names(TreeDF)

  ## Compulsory Site column
  if ("Site" %in% TreeDF.col.names == FALSE) {
    stop("Site index not found in tree description sheet --> Reading aborted!")
  }
  TreeDF$Site <- as.factor(TreeDF$Site)

  ## Compulsory Tree column
  if ("Tree" %in% TreeDF.col.names == FALSE) {
    stop("Tree column not found in tree description sheet --> Reading aborted!")
  }
  TreeDF$Tree <- as.factor(TreeDF$Tree)

  ## Compulsory Species column
  if ("Species" %in% TreeDF.col.names == FALSE) {
    stop("Species column not found in tree description sheet --> Reading aborted!")
  }
  TreeDF$Species <- as.factor(TreeDF$Species)

  ## Requested Age column
  if ("Age" %in% TreeDF.col.names == FALSE) {
    warning("Age column not found in tree description sheet -->
            corresponding column is filled with NA, reading continue...")
    TreeDF$Age <- as.integer(NA)
  }
  TreeDF$Age <- as.integer(TreeDF$Age)

  ## Requested Diameter column
  if ("Diameter" %in% TreeDF.col.names == FALSE) {
    warning("Diameter column not found in tree description sheet -->
            corresponding column is filled with NA, reading continue...")
    TreeDF$Diameter <- as.numeric(NA)
  }
  TreeDF$Diameter <- as.numeric(TreeDF$Diameter)

  ## Requested Height column
  if ("Height" %in% TreeDF.col.names == FALSE) {
    warning("Height column not found in tree description sheet -->
            corresponding column is filled with NA, reading continue...")
    TreeDF$Height <- as.numeric(NA)
  }
  TreeDF$Height <- as.numeric(TreeDF$Height)

  ## Remark column
  if ("Remark" %in% TreeDF.col.names == TRUE) {
    message("RM. Please note that remark column from tree description sheet will not be incorporated
             in the final table...")
  }

  TreeDF <- TreeDF[c("Site", "Tree", "Species", "Age", "Diameter", "Height")]
  
  message("...Reading of tree description sheet done. \n")
  

  ## ---------------------------------------------------------
  ## Reading sampling description sheet in the input data file
  ## ---------------------------------------------------------
 
  message("--> Reading of sampling description sheet... \r")
  SamplingDF  <- read.xls(excel.file.name, sheet=sampling.description.sheet.name)

  ## --> Checking that all the variables needed are present in the input file and aborting
  ##      or sending a warning if it is not the case.
  ## + forcing them to the right format if there are present
  ## -------------------------------------------------------------------------------------
 
  SamplingDF.col.names <- names(SamplingDF)

  ## Compulsory Year column
  if ("Year" %in% SamplingDF.col.names == FALSE) {
    stop("Year column not found in sampling description sheet --> Reading aborted!")
  }
  SamplingDF$Year <- as.integer(SamplingDF$Year)
 
  ## Compulsory Sample column
  if ("Sample" %in% SamplingDF.col.names == FALSE) {
    stop("Sample column not found in sampling description sheet --> Reading aborted!")
  }
  SamplingDF$Sample <- as.integer(SamplingDF$Sample)

  ## Compulsory DY column
  if ("DY" %in% SamplingDF.col.names == FALSE) {
    stop("DY column not found in sampling description sheet --> Reading aborted!")
  }
 SamplingDF$DY <- as.integer(SamplingDF$DY)

  ## Remark column
  if ("Remark" %in% TreeDF.col.names == TRUE) {
    message("RM. Please note that remark column from sampling description sheet will not be incorporated in the final table...")
  }

  SamplingDF <- SamplingDF[c("Year", "Sample", "DY")]

  message("... Reading of sampling description sheet done. \n")


  ## -----------------------------------------------------
  ## Reading cell count data sheets in the input data file
  ## -----------------------------------------------------
	
	## Declaration of the data.frame that will aggregate data from all the data sheets
	AggDF <- data.frame()

  ## Creating a vector with the name of the data sheets to read
  cell.count.sheet.names  <- as.factor(paste("Tree", TreeDF$Tree))

  ## Loop for reading successively the different sheets of the input excel file
  message("--> Reading cell count data sheets... \r")

  for (i in 1:nlevels(cell.count.sheet.names)) {

	  ## Reading the input data file
    message(paste(i, ".", "\t --> Reading data for tree ", TreeDF$Tree[i], " in cell count sheet ",
                  cell.count.sheet.names[i], "... \r", sep=""))
		InDF  <- read.xls(excel.file.name, sheet=cell.count.sheet.names[i])
    
		## --> Checking that all the variables needed are present and aborting
      ##      or sending a warning if it is not the case
		InDF.col.names <- names(InDF)
 
		## Compulsory Sample column
		if ("Sample" %in% InDF.col.names == FALSE) {
      stop(paste("Sample column not found in", cell.count.sheet.names[i],
                 " cell count sheet --> reading aborted!", sep=""))
		}
 
		## Compulsory columns: cambial cells count
		stc <- sum(ifelse (c("C1", "C2", "C3") %in% InDF.col.names, 0, 1))
		if (stc > 0) {
		  stop(paste("C1, C2 or C3 (i.e. cambial cell count) not found in", cell.count.sheet.names[i],
                 " cell count sheet --> reading aborted!", sep=""))
		}
    
		## Compulsory columns: enlarging cells count
		ste <- sum(ifelse (c("E1", "E2", "E3") %in% InDF.col.names, 0, 1))
		if (ste > 0) {
		  stop(paste("E1, E2 or E3 (i.e. enlarging cell count) not found in", cell.count.sheet.names[i],
		             " cell count sheet --> reading aborted!", sep=""))
		}
    
		## Compulsory columns: thickening cells count
		stt <- sum(ifelse (c("W1", "W2", "W3") %in% InDF.col.names, 0, 1))
		if (stt > 0) {
		  stop(paste("W1, W2 or W3 (i.e. thickening cell count) not found in", cell.count.sheet.names[i],
		             " cell count sheet --> reading aborted!", sep=""))
		}
    
		## Compulsory columns: mature cells count
		stm <- sum(ifelse (c("M1", "M2", "M3") %in% InDF.col.names, 0, 1))
		if (stm > 0) {
		  stop(paste("M1, M2 or M3 (i.e. mature cell count) not found in", cell.count.sheet.names[i],
		             " cell count sheet --> reading aborted!", sep=""))
		}
    
		## Compulsory columns: previous ring cells count
		stp <- sum(ifelse (c("P1", "P2", "P3") %in% InDF.col.names, 0, 1))
		if (stp > 0) {
		  stop(paste("P1, P2 or P3 (i.e. previous ring cell count) not found in", cell.count.sheet.names[i],
		             " cell count sheet --> reading aborted!", sep=""))
		}
		
		## Merging cell count table and sampling dates table
		IDF <- merge(SamplingDF, InDF)

		## Creating a "database-type" data.frame
      Site <- as.factor(rep.int(TreeDF$Site[i], 3*nrow(IDF)))
		Tree <- as.factor(rep.int(TreeDF$Tree[i], 3*nrow(IDF)))
      Species <- as.factor(rep.int(TreeDF$Species[i], 3*nrow(IDF)))
		Year <- as.integer(rep.int(IDF$Year, 3))
	   Sample <- as.integer(rep.int(IDF$Sample, 3))
	   DY <- as.integer(rep.int(IDF$DY, 3))
	   RF <- as.factor(c(rep.int(1, nrow(IDF)), rep.int(2, nrow(IDF)), rep.int(3, nrow(IDF))))
	   CZ <- as.integer(c(IDF$C1, IDF$C2, IDF$C3))
	   EZ <- as.integer(c(IDF$E1, IDF$E2, IDF$E3))
	   WZ <- as.integer(c(IDF$W1, IDF$W2, IDF$W3))
	   MZ <- as.integer(c(IDF$M1, IDF$M2, IDF$M3))
	   PR <- as.numeric(c(IDF$P1, IDF$P2, IDF$P3))

	  TempDF <- data.frame(Site, Year, Species, Tree, Sample, DY, RF, CZ, EZ, WZ, MZ, PR)
	
      ## Adding the new table to the database
	   AggDF <- rbind(AggDF, TempDF)

  } ## End for
  message(paste("... Reading of ", i, " cell count sheets done. \n"))

  
  ## Removing blank lines in the output table
  ## ----------------------------------------
  ## Constructing a function for testing blank lines
  removeTest <- function(X) {is.na(X[1]) & is.na(X[2]) & is.na(X[3]) & is.na(X[4])}		
		
  ## Applying the test function to the builded data.frame
  AggDF$Remove <- apply(AggDF[, 8:11], 1, removeTest)
		
  ## Removing blank lines				
  ODF <- AggDF[AggDF$Remove == FALSE, ]

  ## Returning the ordered data-base like table 
  ## ------------------------------------------
  ## Removing the last columns used for removing blank lines
  ODF <- ODF[, 1:(ncol(ODF)-1)]
 
  ## Ordering the output dataframe
  ODF <- ODF[order(ODF$Site, ODF$Year, ODF$Species, ODF$Tree, ODF$Sample, ODF$RF), ]
	
  return(ODF)

} ## End function readExcelCountTable

###################################################################################################
##                           End readExcelCountTable function
###################################################################################################
