## ************************************************************************************************
## computeCriticalDates function definition
## ------------------------------------------------------------------------------------------------
##
## Computing wood formation phenology critical dates
## 
## Arguments:
##   - data: data.frame with imposed column names:
##      Site, Year, Tree, Species, Sample, DY, RF, CZ, EZ, WZ, MZ, PR
##   - plot: TRUE (or FALSE) if output plot (pdf) is desired (or not)
##
## Output:
##		- dataframe of critical dates and duration with associated 95% CI
##		- pdf file with the verification plots (optional)
##
##
## Versions:
##		1. Linear interpolation
##		2. Binary transformation of input data
##		3. Logistic regression
##		4. Confidence interval computation
##			4.1. Values in plain active phase forced to 1
##				4.1-1. Bug concerning plot subtitle corrected
##				4.1-2. Replacing aberrant value by NA
##				4.1-3. Bug concerning tree number handling corrected (12/12/2011)
##				4.1-4. Missing data handling improvement (16, 19, 21/12/2011)
##       4.2. Cleaning without changing the structure
##          4.2-1. Variable names homogeneisation (01/09/2016)
##
## Started: 21 October 2009
## Last modifications: 07 September 2016
## Author: Cyrille RATHGEBER - INRA Nancy
##
## ------------------------------------------------------------------------------------------------

computeCriticalDates <- function(data, plot=TRUE) {
   
   #message("--> Entering computeCriticalDates function...")
   
   ## =============================================================================================   
   ## Declarations and settings
   ## =============================================================================================
   ## Formating variables
   ## -------------------
   IDF  <- data ## IDF: Input Data Frame
   IDF$Tree <- factor(IDF$Tree, exclude=NULL)
	
	## Defining beginning and cessation of xylem differentiation phases
	## ----------------------------------------------------------------
	Tree <- character()  # ???
	bE <- numeric() 	   ## bE: beginning of Enlargement phase
	bE.ci <- numeric() 	## 95% confidence interval for bE
	cE <- numeric()	   ## cE: cessation of Enlargement phase
	cE.ci <- numeric()   ## 95% confidence interval for cE
	bW <- numeric()      ## bW: beginning of Wall thickening and lignification phase
	bW.ci <- numeric()   ## 95% confidence interval for bW
	cW <- numeric()	   ## cW: cessation of Wall thickening and lignification phase
	cW.ci <- numeric()   ## 95% confidence interval for cW
	bM <- numeric()	   ## bM: beginning of Mature phase
	bM.ci <- numeric()	## 95% confidence interval for bM
	
	## General settings for the verification plots
	## -------------------------------------------
	Verification.plot.file.name <- paste("Verification plots for computation of critical dates - ", Sys.Date(), ".pdf", sep="")
	pdf(file = Verification.plot.file.name)

		
	## =============================================================================================
	## Loop for computing critical dates for each tree individualy
	## =============================================================================================

	i <- 0
	
	for(t in levels(IDF$Tree)) {
	   
	   ## ---------------------------------------------------------------------------------------------
	   ## Data manipulation
	   ## =============================================================================================
		i <- i + 1
		Tree[i] <- t
		TDF <- IDF[IDF$Tree==t, ] ## TDF: individual Tree Data Frame

		## Binarization of EZ, WZ & MZ
		## ---------------------------
		BDF <- TDF ## BDF: individual tree Binary Data Frame
		BDF$E <- ifelse(BDF$EZ > 0.5, 1, 0)
		BDF$W <- ifelse(BDF$WZ > 0.5, 1, 0)
		BDF$M <- ifelse(BDF$MZ > 0.5, 1, 0)

		## Computing the mean for each dates
		## ---------------------------------
		BDF$DY <- as.factor(BDF$DY)
		MDF <- aggregate(BDF[, c("E", "W", "M")],
			by=list(BDF$Tree, BDF$DY), FUN=mean, na.rm=TRUE)
		names(MDF) <- c("Tree", "DY", "E", "W", "M") ## MDF: individual tree aggregated (Mean) binary Data Frame

		## Rounding means
		MDF[, c("E", "W", "M")] <- round(MDF[, c("E", "W", "M")], digits=2)

		## Ordering mean data frame
		MDF <- MDF[order(MDF$Tree, MDF$DY), ]
		MDF$DY <- as.numeric(levels(MDF$DY))[MDF$DY]
		BDF$DY <- as.numeric(levels(BDF$DY))[BDF$DY]
		

		## ---------------------------------------------------------------------------------------------
		## Enlarging phase
		## =============================================================================================
		## Checking if there is any values available for E
		## ===============================================
		## If no --> attribute NA to critical dates
		if (length(na.omit(MDF$E)) == 0){
			bE[i] <-  NA
			bE.sd[i] <- NA
			cE[i] <-  NA
			cE.sd[i] <- NA	
		}
		
		## If yes --> compute critical dates
		else {
			## Delimiting the plain active phase for Enlargement
		   ## =================================================
			datE <- MDF$DY[MDF$E > 0.99]
			datE.net <- na.omit(datE)		
			datE1 <- datE.net[1]
			datE2 <- datE.net[length(datE.net)]
		
			## spliting the dataset into begining and end
			EDF.b <- BDF[BDF$DY <= datE2, c("DY", "E")] ## EDF.b: individual tree binary Data Frame for E at the begining
			EDF.c <- BDF[BDF$DY >= datE1, c("DY", "E")] ## EDF.c: individual tree binary Data Frame for E at the end
		
			## Forcing plain active phase values to 1
			## --> in order to ensure that begining and end are correctly computed!
			EDF.b[EDF.b$DY >= datE1, "E"] <- 1
			EDF.c[EDF.c$DY <= datE2, "E"] <- 1

			## Computing the beginning using a logistic regression
			## ===================================================
			DY <- EDF.b$DY
			E <- EDF.b$E
			
			## Looking for missing values
			## --------------------------
			EDF.bm <- aggregate(EDF.b[, c("E")], by=list(EDF.b$DY), FUN=mean, na.rm=TRUE)
			names(EDF.bm) <- c("DY", "E.m")
			MV <- EDF.bm[is.na(EDF.bm$E.m) == TRUE, ]
			if (nrow(MV) >= 1) MV$E <- 0.5

			## Applying GLM
			## ------------	
			model.bE <- glm(E ~ DY, binomial)
			bE[i] <- as.numeric(round(-model.bE$coef[1] / model.bE$coef[2], digits=0))
			
			## Replacing NA by (O, 1) couple to compute CI more realistacally, if needed
			if (bE[i] >= min(MV$DY) & bE[i] <= max(MV$DY)) {
				DY <- c(DY, rep(MV$DY, 2))
				E <- c(E, rep(0, length(MV$DY)), rep(1, length(MV$DY)))
				model.bE <- glm(E ~ DY, binomial)
				bE[i] <- as.numeric(round(-model.bE$coef[1] / model.bE$coef[2], digits=0))
			}
			
			## Estimating the confidence interval
			bE1 <- as.numeric((log(97.5/2.5) - model.bE$coef[1]) / model.bE$coef[2])
			bE2 <- as.numeric((log(2.5/97.5) - model.bE$coef[1]) / model.bE$coef[2])
			bE.ci[i] <- round(abs(bE1 - bE2), digits=1)


			## Plot the results for the beginning if option selected
			## ========================================================
			if(plot==TRUE) {
				plot(DY, E, type="n", ann=FALSE)
				abline(h=c(0.025, 0.5, 0.975), col="grey", lty=2)
				xp <- c(bE1, bE2, bE2, bE1)
				yp <- c(0.025, 0.025, 0.975, 0.975)
				polygon(xp, yp, density=NA, col="yellow", border="yellow")
				x <- seq(min(DY), max(DY), 1)
				y <- predict(model.bE, list(DY=x), type="response")
				lines(x, y, col="blue")
				points(EDF.b$DY, EDF.b$E, type="p", pch=1, col="blue")	
				points(bE[i], 0.5, pch=10, col="darkgreen", cex=2)
				points(MV$DY, MV$E, type="p", pch = 4, col="red")

				## Writting additionnal labels on the plot
				title(paste("Tree", t, " - bE"))
				mtext(paste("bE = ", bE[i], " +/- ", bE.ci[i]/2, "days"), side=3, line=0.5, adj=0)
				mtext("Day of Year", side=1, line=2)
				mtext("Probability", side=2, line=2)
				
				## Adding CaviaR stamp on the figure
				mtext("CaviaR.2 - computeCriticalDates function",
				      side=1, line=3, adj=1, font=2, family="HersheySans", cex=0.5)
			} ## end if
			

			## Computing the end using a logistic regression
			## ========================================================
			DY <- EDF.c$DY
			E <- EDF.c$E
			EDF.cm <- aggregate(EDF.c[, c("E")], by=list(EDF.c$DY), FUN=mean, na.rm=TRUE)
			names(EDF.cm) <- c("DY", "E.m")
			MV <- EDF.cm[is.na(EDF.cm$E.m)==TRUE, ]
			if (nrow(MV) >=1) MV$E <- 0.5

			model.cE <- glm(E ~ DY, binomial)
			cE[i] <- as.numeric(round(-model.cE$coef[1] / model.cE$coef[2], digits=0))
			
			## Replacing NA by (O, 1) couple to compute SD more realistacally, if needed
			if (cE[i] >= min(MV$DY) & cE[i] <= max(MV$DY)) {
				DY <- c(DY, rep(MV$DY, 2))
				E <- c(E, rep(0,length(MV$DY)), rep(1,length(MV$DY)))
				model.cE <- glm(E ~ DY, binomial)
				bE[i] <- as.numeric(round(-model.cE$coef[1] / model.cE$coef[2], digits=0))
			}
			
			## Estimating the standard error			
			cE1 <- as.numeric((log(97.5/2.5) - model.cE$coef[1]) / model.cE$coef[2])
			cE2 <- as.numeric((log(2.5/97.5) - model.cE$coef[1]) / model.cE$coef[2])
			cE.ci[i] <- round(abs(cE1 - cE2), digits=1)

			## plot the results for the end if option selected
			## ===============================================
			if(plot==TRUE) {
				plot(DY, E, type="n", ann=FALSE)
				abline(h=c(0.025, 0.5, 0.975), col="grey", lty=2)
				xp <- c(cE1, cE2, cE2, cE1)
				yp <- c(0.025, 0.025, 0.975, 0.975)
				polygon(xp, yp, density=NA, col="yellow", border="yellow")
				x <- seq(min(DY), max(DY), 1)
				y <- predict(model.cE, list(DY=x), type="response")
				lines(x, y, col="blue")
				points(EDF.c$DY, EDF.c$E, type="p", pch=1, col="blue")
				points(cE[i], 0.5, pch=10, col="darkgreen", cex=2)
				points(MV$DY, MV$E, type="p", pch = 4, col="red")

				## Writting additionnal labels on the plot
				title(paste("Tree", t, " - cE"))
				mtext(paste("cE = ", cE[i], " +/- ", cE.ci[i]/2, "days"), side=3, line=0.5, adj=0)
				mtext("Day of Year", side=1, line=2)
				mtext("Probability", side=2, line=2)
				
				## Adding CaviaR stamp on the figure
				mtext("CaviaR.2 - computeCriticalDates function",
				      side=1, line=3, adj=1, font=2, family="HersheySans", cex=0.5)
			} # end if plot
		} ## end else compute critical dates enlarging phase


		## ---------------------------------------------------------------------------------------------		
		## Wall thickening phase
		## =============================================================================================
		## Checking if there is any values for W
		if (length(na.omit(MDF$W)) == 0){
			bW[i] <-  NA
			bW.sd[i] <- NA
			cW[i] <-  NA
			cW.sd[i] <- NA
		}
		else {
			## Delimiting the plain active phase for L
		   ## =======================================
			datW <- MDF$DY[MDF$W > 0.99]
			datW.net <- na.omit(datW)
			datW1 <- datW.net[1]
			datW1 <- datW[1]
			datW2 <- datW.net[length(datW.net)]

			## spliting dataset
			WDF.b <- BDF[BDF$DY <= datW2, c("DY", "W")]
			WDF.c <- BDF[BDF$DY >= datW1, c("DY", "W")]
		
			## Forcing plain active phase values to 1
			## --> in order to ensure that begining and end are correctly computed!
			WDF.b[WDF.b$DY >= datW1, "W"] <- 1
			WDF.c[WDF.c$DY <= datW2, "W"] <- 1

			## Computing for the beginning using a logistic regression
			## =======================================================
			DY <- WDF.b$DY
			W <- WDF.b$W
			WDF.bm <- aggregate(WDF.b[, c("W")], by=list(WDF.b$DY), FUN=mean, na.rm=TRUE)
			names(WDF.bm) <- c("DY", "W.m")
			MV <- WDF.bm[is.na(WDF.bm$W)==TRUE, ]
			if (nrow(MV) >=1) MV$W <- 0.5
		
			model.bW <- glm(W ~ DY, binomial)
			bW[i] <- as.numeric(round((log(1) - model.bW$coef[1]) / model.bW$coef[2], digits=0))
			
			## Replacing NA by (O, 1) couple to compute SD more realistacally, if needed
			if (bW[i] >= min(MV$DY) & bW[i] <= max(MV$DY)) {
				DY <- c(DY, rep(MV$DY, 2))
				W <- c(W, rep(0, length(MV$DY)), rep(1, length(MV$DY)))
				model.bW <- glm(W ~ DY, binomial)
				bW[i] <- as.numeric(round(-model.bW$coef[1] / model.bW$coef[2], digits=0))
			}
			
			## Estimating the standard error
			bW1 <- as.numeric((log(97.5/2.5) - model.bW$coef[1]) / model.bW$coef[2])
			bW2 <- as.numeric((log(2.5/97.5) - model.bW$coef[1]) / model.bW$coef[2])
			bW.ci[i] <- round(abs(bW1 - bW2), digits=1)

			## plot the results for the beginning if option selected
			if(plot==TRUE) {
				plot(DY, W, type="n", ann=FALSE)
				abline(h=c(0.025, 0.5, 0.975), col="grey", lty=2)
				xp <- c(bW1, bW2, bW2, bW1)
				yp <- c(0.025, 0.025, 0.975, 0.975)
				polygon(xp, yp, density=NA, col="yellow", border="yellow")
				x <- seq(min(DY), max(DY), 1)
				y <- predict(model.bW, list(DY=x), type="response")
				lines(x, y, col="blue")
				points(DY, W, type="p", pch=1, col="blue")
				points(bW[i], 0.5, pch=10, col="darkgreen", cex=2)
				points(MV$DY, MV$W, type="p", pch = 4, col="red")

				## Writting additionnal labels on the plot
				title(paste("Tree", t, " - bW"))
				mtext(paste("bW = ", bW[i], " +/- ", bW.ci[i]/2, "days"), side=3, line=0.5, adj=0)
				mtext("Day of Year", side=1, line=2)
				mtext("Probability", side=2, line=2)
				
				## Adding CaviaR stamp on the figure
				mtext("CaviaR.2 - computeCriticalDates function",
				      side=1, line=3, adj=1, font=2, family="HersheySans", cex=0.5)
			} ## end if

			## Computing the end using a logistic regression
			## =============================================
			DY <- WDF.c$DY
			W <- WDF.c$W
			WDF.cm <- aggregate(WDF.c[, c("W")], by=list(WDF.c$DY), FUN=mean, na.rm=TRUE)
			names(WDF.cm) <- c("DY", "W.m")
			MV <- WDF.cm[is.na(WDF.cm$W)==TRUE, ]
			if (nrow(MV) >=1) MV$W <- 0.5

			model.cW <- glm(W ~ DY, binomial)
			cW[i] <- as.numeric(round((log(1) - model.cW$coef[1]) / model.cW$coef[2], digits=0))
			
			## Replacing NA by (O, 1) couple to compute SD more realistacally, if needed
			if (cW[i] >= min(MV$DY) & cW[i] <= max(MV$DY)) {
				DY <- c(DY, rep(MV$DY, 2))
				W <- c(W, rep(0, length(MV$DY)), rep(1, length(MV$DY)))
				model.cW <- glm(W ~ DY, binomial)
				cW[i] <- as.numeric(round(-model.cW$coef[1] / model.cW$coef[2], digits=0))
			}
			
			## Estimating the standard error
			cW1 <- as.numeric((log(97.5/2.5) - model.cW$coef[1]) / model.cW$coef[2])
			cW2 <- as.numeric((log(2.5/97.5) - model.cW$coef[1]) / model.cW$coef[2])
			cW.ci[i] <- round(abs(cW1 - cW2), digits=1)

			## plot the results for the end if option selected
			if(plot==TRUE) {
				plot(DY, W, type="n", ann=FALSE)
				abline(h=c(0.025, 0.5, 0.975), col="grey", lty=2)
				xp <- c(cW1, cW2, cW2, cW1)
				yp <- c(0.025, 0.025, 0.975, 0.975)
				polygon(xp, yp, density=NA, col="yellow", border="yellow")
				x <- seq(min(DY), max(DY), 1)
				y <- predict(model.cW, list(DY=x), type="response")
				lines(x, y, col="blue")
				points(DY, W, type="p", pch=1, col="blue")
				points(cW[i], 0.5, pch=10, col="darkgreen", cex=2)
				points(MV$DY, MV$W, type="p", pch=4, col="red")

				## Writting additionnal labels on the plot
				title(paste("Tree", t, " - cW"))
				mtext(paste("cW = ", cW[i], " +/- ", cW.ci[i]/2, "days"), side=3, line=0.5, adj=0)
				mtext("Day of Year", side=1, line=2)
			   mtext("Probability", side=2, line=2)
			   
			   ## Adding CaviaR stamp on the figure
			   mtext("CaviaR.2 - computeCriticalDates function",
			         side=1, line=3, adj=1, font=2, family="HersheySans", cex=0.5)
			} ## end if
		} ## end else

		## ---------------------------------------------------------------------------------------------
		## Mature phase
		## =============================================================================================
		## Checking if there is any values for M
		if (length(na.omit(MDF$M)) == 0){
			bM[i] <-  NA
			bM.sd[i] <- NA
		}
		
		else {
			## spliting dataset
			MDF.b <- BDF[, c("DY", "M")]

			## Computing the beginning using a logistic regression
			## ===================================================
			DY <- MDF.b$DY
			M <- MDF.b$M
			MDF.bm <- aggregate(MDF.b[, c("M")], by=list(MDF.b$DY), FUN=mean, na.rm=TRUE)
			names(MDF.bm) <- c("DY", "M.m")
			MV <- MDF.bm[is.na(MDF.bm$M)==TRUE, ]
			if (nrow(MV) >=1) MV$M <- 0.5

			model.bM <- glm(M ~ DY, binomial)
			bM[i] <- as.numeric(round((log(1) - model.bM$coef[1]) / model.bM$coef[2], digits=0))
			
			## Replacing NA by (O, 1) couple to compute SD more realistacally, if needed
			if (bM[i] >= min(MV$DY) & bM[i] <= max(MV$DY)) {
				DY <- c(DY, rep(MV$DY, 2))
				M <- c(M, rep(0, length(MV$DY)), rep(1, length(MV$DY)))
				model.bM <- glm(M ~ DY, binomial)
				bM[i] <- as.numeric(round(-model.bM$coef[1] / model.bM$coef[2], digits=0))
			}
			
			## Estimating the standard error
			bM1 <- as.numeric((log(97.5/2.5) - model.bM$coef[1]) / model.bM$coef[2])
			bM2 <- as.numeric((log(2.5/97.5) - model.bM$coef[1]) / model.bM$coef[2])
			bM.ci[i] <- round(abs(bM1 - bM2), digits=1)

			## Plot the results for the beginning if option selected
			## =====================================================
			if(plot==TRUE) {
				plot(DY, M, type="n", ann=FALSE)
				abline(h=c(0.025, 0.5, 0.975), col="grey", lty=2)
				xp <- c(bM1, bM2, bM2, bM1)
				yp <- c(0.025, 0.025, 0.975, 0.975)
				polygon(xp, yp, density=NA, col="yellow", border="yellow")
				x <- seq(min(DY), max(DY), 1)
				y <- predict(model.bM, list(DY=x), type="response")
				lines(x, y, col="blue")
				points(DY, M, type="p", pch=1, col="blue")
				points(bM[i], 0.5, pch=10, col="darkgreen", cex=2)
				points(MV$DY, MV$M, type="p", pch = 4, col="red")

				## Writting additionnal labels on the plot
				title(paste("Tree", t, " - bM"))
				mtext(paste("bM = ", bM[i], " +/- ", bM.ci[i]/2, "days"), side=3, line=0.5, adj=0)
				mtext("Day of Year", side=1, line=2)
				mtext("Probability", side=2, line=2)
				
				## Adding CaviaR stamp on the figure
				mtext("CaviaR.2 - computeCriticalDates function",
				      side=1, line=3, adj=1, font=2, family="HersheySans", cex=0.5)
			} ## end if
		} ## end else	
	} ## end of loop for computing critical dates for each tree individualy

	dev.off()


	## =============================================================================================
	## Computing durations of critical periods and associated standard deviation
	## =============================================================================================
	RDF <- data.frame(bE, bE.ci, bW, bW.ci, bM, bM.ci, cE, cE.ci, cW, cW.ci)
	names(RDF) <- c("bE", "bE.ci", "bW", "bW.ci", "bM", "bM.ci", "cE", "cE.ci", "cW", "cW.ci")
	RDF$dE <- RDF$cE - RDF$bE 	## dE: total duration of enlarging phase
	RDF$dE.ci <- round(sqrt(RDF$cE.ci^2 + RDF$bE.ci^2), digits=1)
	RDF$dW <- RDF$cW - RDF$bW 	## dW: total duration of maturing phase
	RDF$dW.ci <- round(sqrt(RDF$cW.ci^2 + RDF$bW.ci^2), digits=1)
	RDF$dX <- RDF$cW - RDF$bE 	## dX: total duration of xylem differentiation
	RDF$dX.ci <- round(sqrt(RDF$cW.ci^2 + RDF$bE.ci^2), digits=1)

	## Removing aberrant values
	## ------------------------
	abr.rm <- function(x) {
		ifelse(x >= 0 & x <= 365, x, NA)
	}
	RDF.net <- as.data.frame(apply(RDF, 2, abr.rm))
	
	## Returning the results
	## ---------------------
	ODF <-  data.frame(Tree, RDF.net)

   return(ODF)

} ## End computeCriticalDates function

## ------------------------------------------------------------------------------------------------
##                           End computeCriticalDates function
## ************************************************************************************************