# This function computes the annual secondary production of Cyclopoids using  the size-frequency
# method (Hynes and Coleman, 1969) using the bootstrap resampling method. The final output gives the annual
# production (yearprod; mg DM·m-2 year-1)and the Production to Biomass ratio (ratio P/B)
#
# Usage:
#   yearProd(prod.data, nintervals = 10, sampl.area = 0.067, cpi = 120)
#
# Arguments:
#   prod.data          A numeric vector with values of organisms biomasses(note: not lenghts!)
#   nintervals         Number of intervals in which the 'prod.data' values are
#                      tabulated in frequency form. It defaults to 10
#   sampl.area         Sampling area. It is used to convert the preceding
#                      frequency counts into densities. It defaults to 0.067
#   cpi                CPI value "Cohort Production Interval" (120-180 days; from Dole-Oliver et al. 2000)
#
# Value:
#   A vector of length 2 with the annual secondary production (yearprod) and the
#  Production to Biomass ratio (ratio P/B)
# Examples:
#   produc <- read.table(file="prodCICLOPB2.txt", header=T)
#   yearProd(produc[,1])  # Equivalent to  yearProd(produc[,1], cpi = 120)
#   yearProd(produc[,1], cpi=180)
#   yearProd(produc[,1], sampl.area = 0.1, cpi=(120+180)/2)

yearProd <- function(prod.data,
  nintervals = 10, sampl.area = 0.067, cpi = 120)
{
  min.prod.data <- min(prod.data)
  max.prod.data <- max(prod.data)
  histo <- hist(
    prod.data,
    breaks = seq(from=min.prod.data, to=max.prod.data, by = (max.prod.data - min.prod.data) / nintervals),
    plot = FALSE
  )
  dens <- histo$counts / sampl.area
  loss <- dens
  loss[-nintervals] <- loss[-nintervals] - loss[-1]
  pCla <- histo$mids
  pCla[-nintervals] <- 0.5 * (pCla[-nintervals] + pCla[-1])
  biom <- loss * pCla
  sumBioms <- rep(TRUE, nintervals)
  sumBioms[1] <- biom[1] >= 0
  prod.year <- sum(biom[sumBioms]) * nintervals * 365 / cpi
  result <- c(prod.year, prod.year / sum(histo$mids * dens))
  names(result) <- c("year prod","ratio P/B")
  return(result)
}


# This function generates "nb" number of bootstrap replicates of the annual production value.
# It defaults 10000 iterations.
#
# Usage:
#   bootReplicates.prod(prod.data, 
#     nintervals = 10, sampl.area = 0.067, 
#     cpi = expression(rnorm(1,mean=mean.cpi, sd=sigma.cpi)),
#     min.cpi = 120,  max.cpi = 180,  
#     mean.cpi = (min.cpi + max.cpi)/2,  
#     sigma.cpi = (max.cpi - mean.cpi) / qnorm(0.975),
#     nb = 10000)
#
# Arguments:
#   prod.data          A numeric vector with the same meaning than in function
#                      'yearProd'
#   nintervals         Number of intervals in which the 'prod.data' values are
#                      tabulated in frequency form. It defaults to 10
#   sampl.area         Sampling area. It is used to convert the preceding
#                      frequency counts into densities. It defaults to 0.067
#   cpi                an object of class 'expression' defining how CPI values
#                      are generated when each bootstrap resample is produced.
#                      It defaults to random normal values generated from 'rnorm'
#                      with mean = (min.cpi + max.cpi)/2 and
#                      sd = (max.cpi - mean.cpi) / qnorm(0.975). Other
#                      expressions are admisible, including a constant value,
#                      e.g. cpi = expression(120) or simply cpi = 120
#   min.cpi            Hypothetical minimum value of CPI (species specific).
#                      It is ignored if mean.cpi and sigma.cpi are provided
#   max.cpi            Hypothetical maximum value of CPI (species specific).
#                      It is ignored if mean.cpi and sigma.cpi are provided
#   mean.cpi           Mean of the normal distribution used to generate CPI
#                      resampling values. It defaults to (min.cpi + max.cpi)/2 
#   sigma.cpi          Standard deviation of the normal distribution used to
#                      generate CPI resampling values.
#                      It defaults to (max.cpi - mean.cpi) / qnorm(0.975) 
#   nb                 Number of bootstrap resamples
#
# Value:
#   A numeric matrix with 2 rows and nb columns, each column stands for a 
#   bootstrap replicate of values produced from function 'yearProd' 
#
# Examples:
#   produc <- read.table(file="prodCICLOPB2.txt", header=T)
#   boot.prods <- bootReplicates.prod(produc[,1])
#
#   Show first 10 bootstrap replicates:
#   boot.prods[,1:10] 
#
#   Resample generating uniform CPI values:
#   boot.prods <- bootReplicates.prod(produc[,1], cpi = expression(runif(1,120,180)))
#
#   Resample with constant (120) CPI values:
#   boot.prods <- bootReplicates.prod(produc[,1], cpi = 120)
#
# Details:
#   Note that 'cpi' argument does not have the same meaning in
#   'bootReplicates.prod' than in 'yearProd'.
#   If argument 'cpi' receives a value other than its default, arguments 
#   'min.cpi', 'max.cpi', 'mean.cpi' and 'sigma.cpi' are ignored.
#   Each resample is generated from a nonparametric bootstrap resample from
#   data 'prod.data' and a possibly random CPI value provided by argument 'cpi'

bootReplicates.prod <- function(prod.data, 
  nintervals = 10, sampl.area = 0.067, 
  cpi = expression(rnorm(1,mean=mean.cpi, sd=sigma.cpi)),
  min.cpi = 120,
  max.cpi = 180,
  mean.cpi = (min.cpi + max.cpi)/2,
  sigma.cpi = (max.cpi - mean.cpi) / qnorm(0.975),
  nb = 10000)                              
{
  # Generation of nb production bootstrap replicates with possibly random cpi
  replicate(nb, 
    yearProd(
      sample(prod.data, replace=T), 
      nintervals = nintervals, sampl.area = sampl.area, 
      cpi = eval(cpi)
    )
  )
}


# Bootstrap percentile confidence interval (CI) for year production.
# Creates a simple CI for the bootstrap generated data.
#
# Usage:
#   bootPercCI.prod(prod.data, 
#     nintervals = 10, sampl.area = 0.067, 
#     cpi = expression(rnorm(1,mean=mean.cpi, sd=sigma.cpi)),
#     min.cpi = 120,  max.cpi = 180,  
#     mean.cpi = (min.cpi + max.cpi)/2,  
#     sigma.cpi = (max.cpi - mean.cpi) / qnorm(0.975),
#     nb = 10000,
#     boot.replicates,
#     alphas = c(0.025, 0.975) 
#   )
#
# Arguments:
#   prod.data          A numeric vector with the same meaning than in function
#                      'yearProd'
#   nintervals         Number of intervals in which the 'prod.data' values are
#                      tabulated in frequency form. It defaults to 10
#   sampl.area         Sampling area. It is used to convert the preceding
#                      frequency counts into densities. It defaults to 0.067
#   cpi                an object of class 'expression' defining how CPI values
#                      are generated when each bootstrap resample is produced.
#                      It defaults to random normal values generated from 'rnorm'
#                      with mean = (min.cpi + max.cpi)/2 and
#                      sd = (max.cpi - mean.cpi) / qnorm(0.975). Other
#                      expressions are admisible, including a constant value,
#                      e.g. cpi = expression(120) or simply cpi = 120
#   min.cpi            Hypothetical minimum value of CPI (species specific).
#                      It is ignored if mean.cpi and sigma.cpi are provided
#   max.cpi            Hypothetical maximum value of CPI (species specific).
#                      It is ignored if mean.cpi and sigma.cpi are provided
#   mean.cpi           Mean of the normal distribution used to generate CPI
#                      resampling values. It defaults to (min.cpi + max.cpi)/2 
#   sigma.cpi          Standard deviation of the normal distribution used to
#                      generate CPI resampling values.
#                      It defaults to (max.cpi - mean.cpi) / qnorm(0.975) 
#   nb                 Number of bootstrap resamples
#   boot.replicates    A matrix with resampled production values. Its must have
#                      the same structure than the output of 'bootReplicates.prod'
#   alphas             The probabilities in the tails of the bootstrap 
#                      distribution. The confidence interval will have nominal
#                      1 - (alphas[1] + alphas[2]) confidence level
#
# Value:
#   A numeric matrix with 2 rows and 2 columns, the columns correspond to the 
#   same variables produced by function 'yearProd', the first row corresponds
#   to the lower confidence interval limit and the second row to the upper limit 
#
# Examples:
#   produc <- read.table(file="prodCICLOPB2.txt", header=T)
#   boot.prods <- bootReplicates.prod(produc[,1])
#
#   Show first 10 bootstrap replicates:
#   boot.prods[,1:10] 
#
#   bootPercCI.prod(boot.replicates = boot.prods)
#
#   Confidence interval resampling with uniform CPI values:
#   bootPercCI.prod(produc[,1], cpi = expression(runif(1,120,180)))
#
#   With constant (120) CPI values:
#   bootPercCI.prod(produc[,1], cpi = 120)
#
# Details:
#   If argument 'boot.replicates' is missing, the bootstrap replications are
#   generated by a call to bootReplicates.prod.
#   If argument 'boot.replicates' is provided, all arguments except 'alphas'
#   are ignored
                          
bootPercCI.prod <- function(prod.data, 
  nintervals = 10, sampl.area = 0.067, 
  cpi = expression(rnorm(1,mean=mean.cpi, sd=sigma.cpi)),
  min.cpi = 120,
  max.cpi = 180,
  mean.cpi = (min.cpi + max.cpi)/2,
  sigma.cpi = (max.cpi - mean.cpi) / qnorm(0.975),
  nb = 10000,
  boot.replicates,
  alphas = c(0.025, 0.025) 
)                              
{
  if (missing(boot.replicates)) {
    boot.replicates <- bootReplicates.prod(prod.data,
      nintervals, sampl.area, 
      cpi = cpi, mean.cpi = mean.cpi, sigma.cpi = sigma.cpi, nb = nb
    )
  }
  # Bootstrap percentile confidence intervals:
  limits <- apply(boot.replicates, 1, quantile, prob = c(alphas[1], 1 - alphas[2]))
  limits
}


# Bootstrap Bias-Corrected and accelerated (BCa) confidence interval for year production. The trouble with standard intervals is that they
#are based on an asymptotic approximation that can be quite inaccurate in practice. BCa methods produce non-parametric confidence intervals
# that are an order of magnitude more accurate than the standard. In addition, the nonparametric BCa method is unfazed by
#complicated sample spaces.    

# Usage:
#   bootBCaCI.prod(prod.data, 
#     nintervals = 10, sampl.area = 0.067, 
#     cpi = expression(rnorm(1,mean=mean.cpi, sd=sigma.cpi)),
#     min.cpi = 120,  max.cpi = 180,  
#     mean.cpi = (min.cpi + max.cpi)/2,  
#     sigma.cpi = (max.cpi - mean.cpi) / qnorm(0.975),
#     nb = 10000,
#     boot.replicates,
#     alphas = c(0.025, 0.975)
#   )
#
# Arguments:
#   prod.data          A numeric vector with the same meaning than in function
#                      'yearProd'
#   nintervals         Number of intervals in which the 'prod.data' values are
#                      tabulated in frequency form. It defaults to 10
#   sampl.area         Sampling area. It is used to convert the preceding
#                      frequency counts into densities. It defaults to 0.067
#   cpi                an object of class 'expression' defining how CPI values
#                      are generated when each bootstrap resample is produced.
#                      It defaults to random normal values generated from 'rnorm'
#                      with mean = (min.cpi + max.cpi)/2 and
#                      sd = (max.cpi - mean.cpi) / qnorm(0.975). Other
#                      expressions are admisible, including a constant value,
#                      e.g. cpi = expression(120) or simply cpi = 120
#   min.cpi            Hypothetical minimum value of CPI (species specific).
#                      It is ignored if mean.cpi and sigma.cpi are provided
#   max.cpi            Hypothetical maximum value of CPI (species specific).
#                      It is ignored if mean.cpi and sigma.cpi are provided
#   mean.cpi           Mean of the normal distribution used to generate CPI
#                      resampling values. It defaults to (min.cpi + max.cpi)/2 
#   sigma.cpi          Standard deviation of the normal distribution used to
#                      generate CPI resampling values.
#                      It defaults to (max.cpi - mean.cpi) / qnorm(0.975) 
#   nb                 Number of bootstrap resamples
#   boot.replicates    A matrix with resampled production values. Its must have
#                      the same structure than the output of 'bootReplicates.prod'
#   alphas             The probabilities in the tails of the bootstrap 
#                      distribution. The confidence interval will have nominal
#                      1 - (alphas[1] + alphas[2]) confidence level
#
# Value:
#   A numeric matrix with 2 rows and 2 columns, the columns correspond to the 
#   same variables produced by function 'yearProd', the first row corresponds
#   to the lower confidence interval limit and the second row to the upper limit 
#
# Examples:
#   produc <- read.table(file="prodCICLOPB2.txt", header=T)
#   boot.prods <- bootReplicates.prod(produc[,1])
#
#   Show first 10 bootstrap replicates:
#   boot.prods[,1:10] 
#
#   bootBCaCI.prod(produc[,1], boot.replicates = boot.prods)
#
#   Confidence interval resampling with uniform CPI values:
#   bootBCaCI.prod(produc[,1], cpi = expression(runif(1,120,180)))
#
#   With constant (120) CPI values:
#   bootBCaCI.prod(produc[,1], cpi = 120)
#
# Details:
#   If argument 'boot.replicates' is missing, the bootstrap replications are
#   generated by a call to bootReplicates.prod.
#   Argument 'prod.data' is always required (even if 'boot.replicates' is provided)
                          
bootBCaCI.prod <- function(prod.data, 
  nintervals = 10, sampl.area = 0.067, 
  cpi = expression(rnorm(1,mean=mean.cpi, sd=sigma.cpi)),
  min.cpi = 120,
  max.cpi = 180,
  mean.cpi = (min.cpi + max.cpi)/2,
  sigma.cpi = (max.cpi - mean.cpi) / qnorm(0.975),
  nb = 10000,
  boot.replicates,
  alphas = c(0.025, 0.025) 
)                              
{
  if (missing(boot.replicates)) {
    boot.replicates <- bootReplicates.prod(prod.data,
      nintervals, sampl.area, 
      cpi = cpi, mean.cpi = mean.cpi, sigma.cpi = sigma.cpi, nb = nb
    )
  }
  # Bootstrap BCa confidence intervals:
  # n jackknife replicates of yearProd:
  n <- length(prod.data)
  yearProd_i <- matrix(NA, nrow = 2, ncol = n)
  # Acceleration constant "a" estimate:
  for (i in 1:n) 
    yearProd_i[,i] <- yearProd(prod.data[-i], nintervals, sampl.area, mean.cpi) 
  yearProd_i <- matrix(rep(apply(yearProd_i, 1, mean), n), ncol = n) - yearProd_i
  a <- apply(yearProd_i^3, 1, sum) / (6 * apply(yearProd_i^2, 1, sum)^1.5)
  
  # Bias correction constant "z0" estimate:
  samplYearProd <- yearProd(prod.data, nintervals, sampl.area, mean.cpi)
  z0 <- qnorm(apply(boot.replicates <= matrix(rep(samplYearProd, nb), ncol = nb), 1, sum)  / nb)

  #limits <- apply(boot.replicates, 1, quantile, prob = c(alphas[1], 1 - alphas[2]))
  zalpha <- - qnorm(alphas)                          

  bcaAlphas <- cbind(
    pnorm(z0 + (z0 - zalpha) / (1 - a * (z0 - zalpha))), 
    pnorm(z0 + (z0 + zalpha) / (1 - a * (z0 + zalpha)))
  )
  
  limits <- matrix(NA, nrow = 2, ncol = 2)
  for (j in 1:2) {
    limits[,j] <- quantile(boot.replicates[j,], prob = bcaAlphas[j,])
  }
  colnames(limits) <- rownames(boot.replicates)
  return(limits)
}


# Graphical and numerical comparative summary of the bootstrap values 
# associated to two data sets
#
# Usage:
#   compareProds( data1, data2,
#     boot1, boot2, name1 = "First series", name2 = "Second series",
#     alphas = c(0.025, 0.025),                       
#     graphic = c("hist", "boxplot"),
#     confInt = c("percentile", "BCa")                 
#   )
#
# Arguments:
#   data1              First data vector
#   data2              Second data vector
#   boot1              A matrix with resampled production values. Its must have
#                      the same structure than the output of 'bootReplicates.prod'
#   boot2              A matrix with resampled production values. Its must have
#                      the same structure than the output of 'bootReplicates.prod'
#   name1              Character, name of the first dataset
#   name2              Character, name of the second dataset
#   alphas             The probabilities in the tails of the bootstrap 
#                      distribution. The confidence interval will have nominal
#                      1 - (alphas[1] + alphas[2]) confidence level
#   graphic            Name of the graphic to be displayed comparing 'boot1' and
#                      'boot2'. The possible values are "hist" (default) to
#                      plot the combined histogram or "boxplot" 
#   confInt            Type of bootstrap confidence interval to be computed.
#                      The possible values are "Percentile" (default) or "BCa"
#   plotRatio          Plot ratio P/B? Boolean value, defaults to FALSE
#
# Examples:
#   exemple1 <- read.table(file="prodCICLOPA1.txt", header=T)
#   exemple2 <- read.table(file="prodCICLOPA2.txt", header=T)
#
#   boot.prods1 <- bootReplicates.prod(exemple1[,1])
#   boot.prods2 <- bootReplicates.prod(exemple2[,1])
#
#   compareProds(boot1 = boot.prods1, boot2 = boot.prods2, name1 = "A1", name2 = "A2")                     
#   compareProds(exemple1, exemple2, boot.prods1, boot.prods2, "A1", "A2", 
#     graphic = "boxplot", confInt = "BCa")
#   
#
# Details:
#   For each set of bootstrap values, the percentile confidence interval and
#   the mean of all bootstrap values lying inside de confidence interval are
#   displayed. These numercial values are accompanied by a joint diagram 
#   consisting on joint histograms or boxplots for each series of bootstrap 
#   values (those lying inside the confidence intervals)

compareProds <- function(data1, data2, data3, data4, 
  boot1, boot2,  boot3, boot4,
  name1 = "First series", name2 = "Second series", name3 = "Third series", name4 = "Fourth series",   
  alphas = c(0.025, 0.025), 
  graphic = "hist",
  confInt = "Percentile", 
  plotRatio = FALSE, ...)  
{
  if (missing(boot1)) boot1 <- bootReplicates.prod(data1, ...)
  if (missing(boot2)) boot2 <- bootReplicates.prod(data2, ...)
  if (missing(boot3)) boot3 <- bootReplicates.prod(data3, ...)
  if (missing(boot4)) boot4 <- bootReplicates.prod(data4, ...)
  smmry1 <- summaryProd(data1, boot1, name1, alphas, confInt, ...)  
  smmry2 <- summaryProd(data2, boot2, name2, alphas, confInt, ...)
  smmry3 <- summaryProd(data3, boot3, name3, alphas, confInt, ...)
  smmry4 <- summaryProd(data4, boot4, name4, alphas, confInt, ...)
  varnams <- rownames(boot1)
  if (plotRatio)
    old.par <- par(mfrow = c(2,1))
  switch ( 
    graphic,
      hist = {
        groupedHist(list(smmry1[[2]][[1]], smmry2[[2]][[1]], smmry3[[2]][[1]], smmry4[[2]][[1]]),
          tittle = paste(varnams[1], name1,"vs",name2,"vs",name3,"vs",name4))
        if (plotRatio) 
          groupedHist(list(smmry1[[2]][[2]], smmry2[[2]][[2]], smmry3[[2]][[1]], smmry4[[2]][[1]]),
            tittle = paste(varnams[2], name1,"vs",name2,"vs",name3,"vs",name4))
      },
      boxplot = {
        boxplot(list(smmry1[[2]][[1]], smmry2[[2]][[1]], smmry3[[2]][[1]], smmry4[[2]][[1]]),
          names = paste(varnams[1], c(name1, name2, name3, name4)))
        if (plotRatio) 
          boxplot(list(smmry1[[2]][[2]], smmry2[[2]][[2]], smmry3[[2]][[1]], smmry4[[2]][[1]]),
            names = paste(varnams[2], c(name1, name2, name3, name4)))
      }
  )
  if (plotRatio)
    par(mfrow = old.par) 
}


# ******************************************************************************
#  AUXILIARY FUNCTIONS, NOT DESIGNED FOR DIRECT USE
# ******************************************************************************

centralPoints <- function(samplePoints, limits) {
  result <- vector("list", length = nrow(samplePoints))
  for (i in 1:nrow(samplePoints)) {
    result[[i]] <- samplePoints[i, (samplePoints[i,] > limits[1,i]) & (samplePoints[i,] < limits[2,i])]
  }
  names(result) <- rownames(samplePoints)
  result
}

groupedHist <- function(x, tittle, ...) 
{
  junk = NULL
  grouping = NULL
  for(i in 1:length(x)) {
    junk = c(junk,x[[i]])
    grouping <- c(grouping, rep(i,length(x[[i]]))) 
  }
  grouping <- factor(grouping)
  n.gr <- length(table(grouping))
  xr <- range(junk)
  histL <- tapply(junk, grouping, hist, plot = FALSE, ...)
  maxC <- max(sapply(lapply(histL, "[[", "counts"), max))
  h.den <- c(10, 15, 20)
  h.ang <- c(45, 15, -30) 
  plot(histL[[1]], xlim = xr, ylim= c(0, maxC), density = h.den[1], angle = h.ang[1], xlab = "x", main = tittle) 
  for(j in 2:n.gr) 
      plot(histL[[j]], add = TRUE, density = h.den[j], angle = h.ang[j], main = tittle) 
  invisible() 
}


summaryProd <- function(dataVec, boot.replicates, nam, alphas, confInt, ...)
{
  print(nam)
  switch(confInt,
    "Percentile" = {
      limits <- bootPercCI.prod(boot.replicates = boot.replicates, alphas = alphas, ...)
    },
    "BCa" = {
      limits <- bootBCaCI.prod(dataVec, boot.replicates = boot.replicates, alphas = alphas, ...)
    }
  )
  cat("Confidence intervals (", confInt, ")\n")
  print(limits)
  central <- centralPoints(boot.replicates, limits)
  print("Mean value of central (inside confidence interval) bootstrap replicates:")
  for (i in 1:nrow(boot.replicates)) {
    cat(rownames(boot.replicates)[i], "  ", mean(central[[i]]), "\n")
  }
  return(list(limits, central))
}