## ************************************************************************************************
## computeBootstrapTest function definition
## ------------------------------------------------------------------------------------------------
##
## Purpose -> Computing unilateral bootstrap permutation tests for testing equality of mean,
##    median, or variance between two groups of individuals
## 
## Arguments:
##	   - data.group1: numerical vector containing data for the first group
##		- data.group2: numerical vector containing data for the second group
##		- ci.group1 (optional): numerical vector containing confidence intervals for the first group
##		- ci.group2 (optional): numerical vector containing confidence intervals for the second group
##		- stat (optional):   "mean" for mean comparison;
##									"Student" for mean comparison using Student's t statistic;
##									"median" for median comparison, default = median;
##									"var" for variance comparison;
##									"disp.mean" for dispersion around the mean comparison;
##									"disp.median" for dispersion around the median comparison.
##		- centring (optional): centring data (TRUE), or not (FALSE), default = FALSE
##		- iter (optional): number of iteration for the bootstrap, default = 1,000
##		- out (optional): TRUE (or FALSE) if bootstrapped series are desired (or not),
##		   default = FALSE
##		- plot (optional): TRUE (or FALSE) if plot is desired (or not), default = FALSE
##		- main (optional): title of the plot
##		- var.name (optional): name of the variable to be tested
##		- sub (optional): sub-title of the plot
##
## Outputs:
##		- A list containing the results of the test
## 	   Observed statistic of group 1 is superior to the one of group 2.
##			H0 : the statistic of the reference population 1 is equal to the one of reference population 2;
##			H1 : the statistic of th ereference population 1 is greater than the one of reference population 2.
##		- Histrogram of the bootrap distribution with the observed statistic.
##
##
## Versions:
##    1. Two sample bootstrap test
##    2. Test on median
##		3. Test on variance
##		4. Test with dispersion of critical dates
##          Warning: Dispersion is not implemented for test based on centred statistics !
##    5. General cleaning (22/09/2016)
##
## Started: 14 July 2009
## Last modifications: 22, 24, 26 September 2016
## Author: Cyrille RATHGEBER - INRA Nancy
##
## ------------------------------------------------------------------------------------------------

computeBootstrapTest <- function(data.group1, data.group2, ci.group1 = NA, ci.group2 = NA,
                                 stat = "median", centring = FALSE, iter = 1000, out = FALSE,
                                 plot = FALSE, main = "NULL", var.name = "?", sub = "NULL") {
   
   #message("--> Entering computeBootstrapTest function...")
   
   ## ============================================================================================= 
	## Associated function
   ## ============================================================================================= 	
	
	C <- function(n, p) {
		# *********************************************************************************************
		# C() function definition
		# ---------------------------------------------------------------------------------------------
		# C(n, p) compute the number of possible combinations of p elements drawn simultaneously from 
		# an ensemble of n elements.
		# *********************************************************************************************
		
		factorial(n) / (factorial(p) * factorial(n - p))
	}
	
	## =============================================================================================   
	## Declarations and settings
	## =============================================================================================
	y <- data.group1
   z <- data.group2
   y.sd <- ci.group1/2
   z.sd <- ci.group2/2
	
	
	# Removing missing values from x and y
	# ------------------------------------
	y <- y[is.na(y)==FALSE]	
	z <- z[is.na(z)==FALSE]
	
	# Constructing the common dataset
	# -------------------------------	
	x <- c(y, z)
	
	if (is.na(y.sd)==FALSE && is.na(z.sd)==FALSE) {
		Y <- numeric()
		Z <- numeric()
	
		for (i in 1: length(y)) {
			Y <- c(Y, rnorm(100, mean=y[i], sd=y.sd[i]))
			Z <- c(Z, rnorm(100, mean=z[i], sd=z.sd[i]))
		}
		x <- c(Y, Z)		
	}

	# Local variables initialization
	# ------------------------------
	V1greaterthanV2 <- TRUE
	ts.boot <- numeric(iter)	
	
	# Computing the number of possible combinations
	# ---------------------------------------------
	p <- length(y)
	q <- length(z)
	n <- p + q	
	a1 <- n + p - 1
	a2 <- n + q - 1
	b <- n - 1
	NbC <- C(a1, b) * C(a2, b) # Number of possible combinations considering the two groups
	
	## =============================================================================================   
	## Simple mean comparison, option stat = "mean"
	## =============================================================================================
	 if (stat == "mean") {
		
		test.stat <- "mean difference"
		x.lab <- "Difference between group means"
		
		# observed studied statistic for group 1 and 2
		s1 <- mean(y)
		s2 <- mean(z)

		# Computing observed difference between means
		if (s1 > s2) {
			sa <- s1
			na <- length(y)
			sb <- s2
			nb <- length(z)
		}
		else {
			V1greaterthanV2 <- FALSE
			sa <- s2
			na <- length(z)
			sb <- s1
			nb <- length(y)
		}
		ts.obs <- sa - sb
		
		if (centring == TRUE) {
			# adjusting data of group 1 and 2
			y <- y - mean(y) + mean(x)
			z <- z - mean(z) + mean(x)
			x <- c(y, z)
		}
		
		# Computing bootstrapped differences between means
		for (i in 1:iter) {
			ts.boot[i] <- mean(sample(x, na, replace=TRUE)) - mean(sample(x, nb, replace=TRUE))
		}
	}
	
	## =============================================================================================   
	## Studentized mean comparison, option stat = "Student"
	## =============================================================================================	
	if (stat == "Student") {
		
		test.stat <- "Student t"
		x.lab <- "Studentized difference between group means"
		
		# observed studied statistic for group 1 and 2
		s1 <- mean(y)
		s2 <- mean(z)

		# Computing observed difference between means
		if (s1 > s2) {
			sa <- s1
			va <- var(y)
			na <- length(y)
			sb <- s2
			vb <- var(z)
			nb <- length(z)
		}
		else {
			V1greaterthanV2 <- FALSE
			sa <- s2
			va <- var(z)
			na <- length(z)
			sb <- s1
			vb <- var(y)
			nb <- length(y)
		}
		ts.obs <- (sa - sb) / sqrt(va/na + vb/nb)
		
		if (centring == TRUE) {
			# adjusting data of group 1 and 2
			y <- y - mean(y) + mean(x)
			z <- z - mean(z) + mean(x)
			x <- c(y, z)
		}
		
		# Computing bootstrapped differences between means
		for (i in 1:iter) {
			a <- sample(x, na, replace=TRUE)
			b <- sample(x, nb, replace=TRUE)
			ts.boot[i] <- (mean(a) - mean(b)) / sqrt(var(a)/na + var(b)/nb)
		}
	}
	
	## =============================================================================================   
	## Simple median comparison, option stat = "median"
	## =============================================================================================
	if (stat == "median") {
		
		test.stat <- "median difference"
		x.lab <- "Difference between group medians"

		# observed studied statistic for group 1 and 2
		s1 <- median(y)
		s2 <- median(z)
		
		# Computing observed difference between medians
		if (s1 > s2) {
			sa <- s1
			na <- length(y)
			sb <- s2
			nb <- length(z)			
		}
		else {
			V1greaterthanV2 <- FALSE
			sa <- s2
			na <- length(z)			
			sb <- s1
			nb <- length(y)			
		}
		ts.obs <- sa - sb
		
		# Computing bootstrapped differences between medians
		for (i in 1:iter) {
			ts.boot[i] <- median(sample(x, na, replace=TRUE)) - median(sample(x, nb, replace=TRUE))
		}
	}
	
	## =============================================================================================   
	## Variance comparison, option stat = "var"
	## =============================================================================================		
	if (stat == "var") {
		
		test.stat <- "log of variance ratio (F)"
		x.lab <- "Ratio between group variance"
		
		# observed studied statistic for group 1 and 2
		s1 <- var(y)
		s2 <- var(z)
		
		# Computing observed ratio between variance
		if (s1 > s2) {
			sa <- s1
			na <- length(y)
			sb <- s2
			nb <- length(z)		
		}
		else {
			V1greaterthanV2 <- FALSE
			sa <- s2
			na <- length(z)		
			sb <- s1
			nb <- length(y)	
		}
		ts.obs <- log(sa / sb)
		
		# Computing bootstrapped ratio between variances
		for (i in 1:iter) {
			ts.boot[i] <- log(var(sample(x, na, replace=TRUE)) / var(sample(x, nb, replace=TRUE)))
		}
	}
	
	## =============================================================================================   
	## Mean dispersion comparison, option stat = "disp.mean"
	## =============================================================================================	
	if (stat == "disp.mean") {
		
		test.stat <- "dispersion around the mean ratio"
		x.lab <- "Ratio between group dispersion around the mean"
		
		# data transformation (Levene, 1960)
		y.trans <- abs(y - mean(y))
		z.trans <- abs(z - mean(z))
		
		# observed studied statistic for group 1 and 2
		s1 <- mean(y.trans)
		s2 <- mean(z.trans)
		
		# Computing observed ratio between dispersions
		if (s1 > s2) {
			sa <- s1
			na <- length(y.trans)
			sb <- s2
			nb <- length(z.trans)		
		}
		else {
			V1greaterthanV2 <- FALSE
			sa <- s2
			na <- length(z.trans)		
			sb <- s1
			nb <- length(y.trans)	
		}
		ts.obs <- sa / sb
		
		# Computing bootstrapped ratio between dispersions
		for (i in 1:iter) {
			ts.boot[i] <- mean(sample(x, na, replace=TRUE)) / mean(sample(x, nb, replace=TRUE))
		}
	}
	
	## =============================================================================================   
	## Median dispersion comparison, option stat = "disp.median"
	## =============================================================================================
	if (stat == "disp.median") {
		
		test.stat <- "dispersion around the median ratio"
		x.lab <- "Ratio between group dispersion around the median"
		
		# data transformation (Levene, 1960)
		y.trans <- abs(y - median(y))
		z.trans <- abs(z - median(z))
		
		# observed studied statistic for group 1 and 2
		s1 <- mean(y.trans)
		s2 <- mean(z.trans)		
		
		# Computing observed ratio between dispersions
		if (s1 > s2) {
			sa <- s1
			na <- length(y.trans)
			sb <- s2
			nb <- length(z.trans)		
		}
		else {
			V1greaterthanV2 <- FALSE
			sa <- s2
			na <- length(z.trans)		
			sb <- s1
			nb <- length(y.trans)	
		}
		ts.obs <- sa / sb
		
		# Computing bootstrapped ratio between dispersions
		for (i in 1:iter) {
			ts.boot[i] <- mean(sample(x, na, replace=TRUE)) / mean(sample(x, nb, replace=TRUE))
		}
	}

	## =============================================================================================   
	## Presenting the results of the test
	## =============================================================================================
	## Computing the significance level for the test
	## ---------------------------------------------
	ts <- c(ts.boot, ts.obs)
	dr <- as.integer(round(log10(iter), digits=0))
	asl <- signif(length(ts[ts > ts.obs]) / length(ts), dr)
	
	
	# Writting the results
	# --------------------
	L1 <- paste("Bootstrapped test of significance")
	L2 <- paste("Variable tested: ", var.name)
	L3 <- paste("Studied statistic (s):", stat, "; Tested statistic: ", test.stat)
	L4 <- paste("s group 1  = ", sa, " ; s group 2 = ", sb, " ; ts obs. = ", ts.obs)
	
	L5 <- " "
	if (V1greaterthanV2 == TRUE) {
		L5 <- paste("H0: s group 1 = s group 2 ; H1: s group 1 > s group 2")
	}
	if (V1greaterthanV2 == FALSE) {
		L5 <- paste("H0: s group 1 = s group 2 ; H1: s group 1 < s group 2")
	}
	
	L6 <- paste("Nb. iterations = ", iter, "; ASL = ", asl)
	
	Summary <- c(L1, L2, L3, L4, L5, L6)
	KeyValues <- c(ts.obs, iter, NbC, asl)
	names(KeyValues)  <- c("D.obs", "Nb Combi.", "Nb iter.", "ASL")

	if (out == FALSE) Output <- list(summary=Summary, ASL=asl)
	else Output  <- list(summary=Summary, ts.obs=ts.obs, iterations=iter, nb.combinations=NbC,
							ASL=asl, ts=ts)
	
	
	# Plotting the results
	# --------------------
	if (plot==TRUE) {
	
		# Drawing the simulated histogram
		H <- hist(ts, main=NULL, xlab=NULL, ylab=NULL, col="blue")

		# Drawing the observation line
		abline(v=ts.obs, col="red")
		
		maxcount <- max(H$counts)
		text(ts.obs, 2/3 * maxcount, labels="Obs.", pos=4, col="red", srt=90)
	
		## Writting plot title
		## -------------------
		if (main == "NULL") {
		   mtext(L1, side=3, line=3, adj=0.0, cex=1)
		}
		else {
		   mtext(main, side=3, line=3, adj=0.0, cex=1)
		}
		
		if (sub == "NULL") {
			mtext(L2, side=3, line=2.25, adj=0.0, cex=0.75)
			mtext(L3, side=3, line=1.5, adj=0.0, cex=0.75)
			mtext(L5, side=3, line=0.75, adj=0.0, cex=0.75)
			mtext(L6, side=3, line=0, adj=0.0, cex=0.75)
		}
		else {
			mtext(sub, side=3, line=2, adj=0.0, cex=0.75)
		}
		
		## Writting the x-axis label
		## -------------------------
		mtext(x.lab, side=1, line=2.5, adj=0.5, cex=1)
		
		## Writting the y-axis label
		## -------------------------
		mtext("Frequency", side=2, line=2.5, adj=0.5, cex=1)
		
		## Adding CaviaR stamp on the figure
		## ---------------------------------
		mtext("CaviaR.2 - computeBootstrapTest function",
		      side=1, line=3, adj=1, font=2, family="HersheySans", cex=0.5)

	}
	
	# Returning the results
	# ---------------------
	return(Output)
	
} ## End function computeBootstrapTest

## ------------------------------------------------------------------------------------------------
##                           End computeBootstrapTest function
## ************************************************************************************************
