Segment analysis : corrected confidence interval function (sample_from_CI) +...

Segment analysis : corrected confidence interval function (sample_from_CI) + compute CI of contribution of covariance (~ non randomness of packaging efficiency) segmentNLL.nested function to test nested models segmentNLL for the model with different p and covariances (the one retained for the analysis)

Segment analysis : corrected confidence interval function (sample_from_CI) +...
579ac77d · Cecilia, Helene · 7a7abb3e · 579ac77d · 579ac77d · 579ac77d
Commit 579ac77d authored 4 years ago by Cecilia, Helene
--- a/Code/Rift_Segments_Functions.R
+++ b/Code/Rift_Segments_Functions.R
@@ -35,7 +35,7 @@ data_loading <- function(dataset.name){
  }

 # Likelihood functions  -------------------------------------------------------
-segmentsNLL = function(params, mat, freq){ 
+segmentsNLL.nested = function(params, mat, freq){ 
  p_s <- params[1]
  p_m <- params[2] * p_s
  p_l <- params[3] * p_s
@@ -55,6 +55,29 @@ segmentsNLL = function(params, mat, freq){
  }
  -sum(freq * log(Prob))
 }
+
+# Pm and pl are no longer defined as multiplicative coefficients of ps
+segmentsNLL = function(params, mat, freq){ 
+  p_s <- params[1]
+  p_m <- params[2]
+  p_l <- params[3]
+  
+  p_s <- 1/(1+exp(-p_s))
+  p_m <- 1/(1+exp(-p_m))
+  p_l <- 1/(1+exp(-p_l))
+  
+  cov_sm <- params[4]
+  cov_sl <- params[5]
+  cov_ml <- params[6]
+  
+  Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml)
+  for (ii in 1:nrow(mat)){
+    results = mat[ii,]
+    Prob = c(Prob, prob.by.comb(results, pars) ) # tester ici si sum Prob == 1
+  }
+  -sum(freq * log(Prob))
+}
+
 parnames(segmentsNLL) <- c("p_s", "p_m", "p_l", "cov_sm", "cov_sl", "cov_ml")

 prob.by.comb <- function(results, pars){
@@ -74,48 +97,53 @@ prob.by.comb <- function(results, pars){
    cov_ml * (-1)^both_ml
 }

-plot_fits_with_CI <- function(name, num.draws, scale){
-  freq <- data_loading(name)
-  if(scale == "rel"){
-    obs <- freq/sum(freq)
-  }else{
-    obs <- freq
-  }
-  freq.df <- data.frame(comb = names(freq), obs = obs, type = "obs")
-  fits.df <- compute_CI(name, num.draws, scale, freq)
-  fits.df$type <- "pred"
-  # to keep order of the combination types (not in alphabetical order)
-  freq.df$comb <- factor(freq.df$comb, levels = freq.df$comb)
-  fits.df$comb <- factor(fits.df$comb, levels = freq.df$comb)
-  g <- ggplot() + geom_col(data = freq.df, aes(x = comb, y = obs, fill = type)) +
-    geom_violin(data = fits.df, aes(x = comb, y = pred, fill = type), alpha = 0.5) +
-    labs(x = "", y = "Number of occurences", fill = "") + 
-    ggtitle(name) +
-    theme_bw() + 
-    theme(axis.text.x = element_text(size = 21),
-          axis.text.y = element_text(size = 20),
-          axis.title.y = element_text(size = 22),
-          legend.text = element_text(size = 22),
-          plot.title = element_text(size = 23))
-  return(g)
-}
+# plot_fits_with_CI <- function(name, num.draws, scale){ # add the 95% interval, probably stop using violin also?
+#   freq <- data_loading(name)
+#   if(scale == "rel"){
+#     obs <- freq/sum(freq)
+#   }else{
+#     obs <- freq
+#   }
+#   freq.df <- data.frame(comb = names(freq), obs = obs, type = "obs")
+#   fits.df <- sample_from_CI(name, num.draws, scale, freq)
+#   fits.df$type <- "pred"
+#   # to keep order of the combination types (not in alphabetical order)
+#   freq.df$comb <- factor(freq.df$comb, levels = freq.df$comb)
+#   fits.df$comb <- factor(fits.df$comb, levels = freq.df$comb)
+#   g <- ggplot() + geom_col(data = freq.df, aes(x = comb, y = obs, fill = type)) +
+#     geom_violin(data = fits.df, aes(x = comb, y = pred, fill = type), alpha = 0.5) +
+#     labs(x = "", y = "Number of occurences", fill = "") + 
+#     ggtitle(name) +
+#     theme_bw() + 
+#     theme(axis.text.x = element_text(size = 21),
+#           axis.text.y = element_text(size = 20),
+#           axis.title.y = element_text(size = 22),
+#           legend.text = element_text(size = 22),
+#           plot.title = element_text(size = 23))
+#   return(g)
+# }

-compute_CI <- function(name, num.draws, scale, freq){
-  model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
+sample_from_CI <- function(name, num.draws, scale, freq){
+  model <- readRDS(paste("../Output/segment/",gsub(" ","_",name),"_model.RDS", sep=""))
  COEFS <- rmvn(n = num.draws, mu = model@coef, V = model@vcov)
  fits.df <- NULL
  for(i in seq(1,dim(COEFS)[1])){
    coef <- COEFS[i,]
-    coef.convert <- c(1,coef[2:3]) * coef[1]
+    coef.convert <- coef[1:3] #c(1,coef[2:3]) * coef[1]
    pars.best.fit <- c(1/(1 + exp(-coef.convert)),coef[4:6])
    fits.rel <- apply(mat, 1, prob.by.comb, pars = pars.best.fit)
+    names(fits.rel) <- names(freq)
    fits.abs <- sum(freq) * fits.rel
    if(scale == "rel"){
      pred <- fits.rel
    }else{
      pred <- fits.abs
    }
-    fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = pred))
+    pars.no.cov <- c(1/(1 + exp(-coef.convert)),0,0,0)
+    fit.no.cov <- apply(mat, 1, prob.by.comb, pars = pars.no.cov)
+    names(fit.no.cov) <- names(freq)
+    cov.contrib <- (fits.rel[["SML"]] - fit.no.cov[["SML"]])/fits.rel[["SML"]]
+    fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = pred, expl.effic = rep(cov.contrib, length(pred))))
  }
  return(fits.df)
 }

--- a/Code/Rift_Segments_Main.R
+++ b/Code/Rift_Segments_Main.R
@@ -24,31 +24,44 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts

 #--------- Generate results : uncomment if you don't have the output files already -----
 # for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
+#   print(name)
 #   model <- mle2(minuslogl = segmentsNLL,
 #                start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
 #                data = list(mat = mat, freq = data_loading(name)))
-#   coef <- c(1,model@coef[2:3]) * model@coef[1]
+#   # print(summary(model))
+#   coef <- model@coef[1:3] #c(1,model@coef[2:3]) * model@coef[1]
 #   pars.best.fit = c(1/(1 + exp(-coef)),model@coef[4:6])
+#   # print(pars.best.fit)
+#   # Prob = numeric() # test sum of all combination probabilities is equal to 1 : OK
+#   # for (ii in 1:nrow(mat)){
+#   #   results = mat[ii,]
+#   #   Prob = c(Prob, prob.by.comb(results, pars.best.fit) ) 
+#   # }
+#   # print(sum(Prob))
+#   # ci <- confint(model)
+#   # print(ci)
+#   # print(1/(1+exp(-ci[1:3,])))
 #   saveRDS(model, file = paste("../Output/segment/",gsub(" ","_",name),"_model.RDS", sep=""))
 #   saveRDS(pars.best.fit, file = paste("../Output/segment/",gsub(" ","_",name),"_param.RDS", sep=""))
 # }

-stock.ins <- readRDS("../Output/segment/RVFV_stock_insect_model.RDS")
-stock.mam <- readRDS("../Output/segment/RVFV_stock_mammalian_model.RDS")
-merge.stock.ll <- logLik(stock.ins) + logLik(stock.mam) #12 param
-
-stock.ins.obs <- data_loading("RVFV stock insect")
-stock.mam.obs <- data_loading("RVFV stock mammalian")
-sum.stock.obs <- stock.ins.obs + stock.mam.obs
-model.sum.stock <- mle2(minuslogl = segmentsNLL,
-              start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
-              data = list(mat = mat, freq = sum.stock.obs))
-
-stat <- 2*(merge.stock.ll - logLik(model.sum.stock)) # 1150.84
-pchisq(stat, df=6, lower.tail=FALSE) # 2.486916e-245
+#--------- Justify the use of species-specific parameters ------
+# stock.ins <- readRDS("../Output/segment/RVFV_stock_insect_model.RDS")
+# stock.mam <- readRDS("../Output/segment/RVFV_stock_mammalian_model.RDS")
+# merge.stock.ll <- logLik(stock.ins) + logLik(stock.mam) #12 param
+# 
+# stock.ins.obs <- data_loading("RVFV stock insect")
+# stock.mam.obs <- data_loading("RVFV stock mammalian")
+# sum.stock.obs <- stock.ins.obs + stock.mam.obs
+# model.sum.stock <- mle2(minuslogl = segmentsNLL,
+#               start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
+#               data = list(mat = mat, freq = sum.stock.obs))
+# 
+# stat <- 2*(merge.stock.ll - logLik(model.sum.stock)) # 1150.84
+# pchisq(stat, df=6, lower.tail=FALSE) # 2.486916e-245

 #------- Compute correlations using covariance parameters -----
-# Not sure this formula (from Menten et al.) si directly transferable to our case
+# Not sure this formula (from Menten et al.) is directly transferable to our case
 # for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
 #   print(name)
 #   model <- readRDS(paste("../Output/segment/",gsub(" ","_",name),"_model.RDS", sep=""))
@@ -69,34 +82,44 @@ pchisq(stat, df=6, lower.tail=FALSE) # 2.486916e-245
 #  plot(g)
 # }

+
 #---------- Compare model predictions (CI) ---------
-# agg.fits <- NULL
-# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
-#   fit <- compute_CI(name, 1000, "rel", data_loading(name))
-#   fit$dataset <- name
-#   agg.fits <- rbind(agg.fits, fit)
-# }
-# # have a look at forcats package, fct_inorder(), reorder or relevel
-# agg.fits$comb <- factor(agg.fits$comb, levels = c("Empty","S","M","L","SM","SL","ML","SML"))
+agg.fits <- NULL
+for(name in c("RVFV progeny","SBV progeny")){ 
+  fit <- sample_from_CI(name, 1000, "rel", data_loading(name))
+  fit <- fit %>% group_by(comb) %>% summarise(ci_low = quantile(pred, 0.025),
+                                       ci_high = quantile(pred, 0.975))
+  fit$dataset <- name
+  agg.fits <- rbind(agg.fits, fit)
+}
+# have a look at forcats package, fct_inorder(), reorder or relevel
+agg.fits$comb <- factor(agg.fits$comb, levels = c("Empty","S","M","L","SM","SL","ML","SML"))
 # agg.fits <- agg.fits %>% group_by(dataset, comb) %>% summarise(max = max(pred),
 #                                                           min = min(pred))
-# 
-# ggplot(agg.fits) + geom_linerange(aes(x = comb,
-#                                     ymin = min, ymax = max, col = dataset),
-#                                   position = position_dodge(.5), size = 5, alpha = 0.7) +
-#   labs(y = "CI of predicted probability", x = "") +
-#   theme_bw() +
-#   guides(col = guide_legend(ncol=2)) +
-#   theme(axis.text.x = element_text(size = 19),
-#         axis.text.y = element_text(size = 20),
-#         axis.title.y = element_text(size = 22),
-#         legend.title = element_text(size = 19),
-#         legend.text = element_text(size = 19),
-#         panel.grid.minor.y = element_blank(),
-#         legend.position = "bottom")
+
+ggplot(agg.fits) + geom_linerange(aes(x = comb,
+                                    ymin = ci_low, ymax = ci_high, col = dataset),
+                                  position = position_dodge(.5), size = 5, alpha = 0.7) +
+  labs(y = "CI of predicted probability", x = "") +
+  theme_bw() +
+  guides(col = guide_legend(ncol=2)) +
+  theme(axis.text.x = element_text(size = 19),
+        axis.text.y = element_text(size = 20),
+        axis.title.y = element_text(size = 22),
+        legend.title = element_text(size = 19),
+        legend.text = element_text(size = 19),
+        panel.grid.minor.y = element_blank(),
+        legend.position = "bottom")


 # ------- Quantify and plot impact of covariance ---------
+for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
+  fit <- sample_from_CI(name,1000,"rel",data_loading(name))
+  print(name)
+  print(quantile(fit$expl.effic, 0.025))
+  print(quantile(fit$expl.effic, 0.975))
+}
+
 for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
  freq <- data_loading(name)
  param.cov <- readRDS(paste("../Output/segment/",gsub(" ","_",name),"_param.RDS", sep=""))

--- a/Output/segment/RVFV_progeny_model.RDS
+++ b/Output/segment/RVFV_progeny_model.RDS
--- a/Output/segment/RVFV_progeny_param.RDS
+++ b/Output/segment/RVFV_progeny_param.RDS
--- a/Output/segment/RVFV_stock_insect_model.RDS
+++ b/Output/segment/RVFV_stock_insect_model.RDS
--- a/Output/segment/RVFV_stock_insect_param.RDS
+++ b/Output/segment/RVFV_stock_insect_param.RDS
--- a/Output/segment/RVFV_stock_mammalian_model.RDS
+++ b/Output/segment/RVFV_stock_mammalian_model.RDS
--- a/Output/segment/RVFV_stock_mammalian_param.RDS
+++ b/Output/segment/RVFV_stock_mammalian_param.RDS
--- a/Output/segment/SBV_progeny_model.RDS
+++ b/Output/segment/SBV_progeny_model.RDS
--- a/Output/segment/SBV_progeny_param.RDS
+++ b/Output/segment/SBV_progeny_param.RDS
--- a/Output/segment/compare_predictions_progeny_RVFV_SBV.png
+++ b/Output/segment/compare_predictions_progeny_RVFV_SBV.png
--- a/Output/segment/compare_predictions_stock_mam_insect.png
+++ b/Output/segment/compare_predictions_stock_mam_insect.png