Segment analysis : going further on analysing the results, comparing fits of...

Segment analysis : going further on analysing the results, comparing fits of different datasets + quantifying the contribution of covariances

Segment analysis : going further on analysing the results, comparing fits of...
4769a3d7 · Cecilia, Helene · 53816b9b · 4769a3d7 · 4769a3d7 · 4769a3d7
Commit 4769a3d7 authored 4 years ago by Cecilia, Helene
--- a/Code/Rift_Segments_Functions.R
+++ b/Code/Rift_Segments_Functions.R
@@ -74,28 +74,21 @@ prob.by.comb <- function(results, pars){
    cov_ml * (-1)^both_ml
 }

-plot_fits_with_CI <- function(name){
+plot_fits_with_CI <- function(name, num.draws, scale){
  freq <- data_loading(name)
-  freq.df <- data.frame(comb = names(freq), obs = freq, type = "obs")
-  model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
-  COEFS <- rmvn(n = 1000, mu = model@coef, V = model@vcov)
-  fits.df <- NULL
-  for(i in seq(1,dim(COEFS)[1])){
-    coef <- COEFS[i,]
-    coef.convert <- c(1,coef[2:3]) * coef[1]
-    pars.best.fit = c(1/(1 + exp(-coef.convert)),coef[4:6])
-    fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit)
-    fits = sum(freq) * fits
-    fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = fits))
+  if(scale == "rel"){
+    obs <- freq/sum(freq)
+  }else{
+    obs <- freq
  }
+  freq.df <- data.frame(comb = names(freq), obs = obs, type = "obs")
+  fits.df <- compute_CI(name, num.draws, scale, freq)
  fits.df$type <- "pred"
  # to keep order of the combination types (not in alphabetical order)
  freq.df$comb <- factor(freq.df$comb, levels = freq.df$comb)
  fits.df$comb <- factor(fits.df$comb, levels = freq.df$comb)
  g <- ggplot() + geom_col(data = freq.df, aes(x = comb, y = obs, fill = type)) +
    geom_violin(data = fits.df, aes(x = comb, y = pred, fill = type), alpha = 0.5) +
-    # scale_fill_manual(values = c(observations = "red",
-    #                              predictions = "cyan")) +
    labs(x = "", y = "Number of occurences", fill = "") + 
    ggtitle(name) +
    theme_bw() + 
@@ -107,6 +100,27 @@ plot_fits_with_CI <- function(name){
  return(g)
 }

+compute_CI <- function(name, num.draws, scale, freq){
+  model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
+  COEFS <- rmvn(n = num.draws, mu = model@coef, V = model@vcov)
+  fits.df <- NULL
+  for(i in seq(1,dim(COEFS)[1])){
+    coef <- COEFS[i,]
+    coef.convert <- c(1,coef[2:3]) * coef[1]
+    pars.best.fit <- c(1/(1 + exp(-coef.convert)),coef[4:6])
+    fits.rel <- apply(mat, 1, prob.by.comb, pars = pars.best.fit)
+    fits.abs <- sum(freq) * fits.rel
+    if(scale == "rel"){
+      pred <- fits.rel
+    }else{
+      pred <- fits.abs
+    }
+    fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = pred))
+  }
+  return(fits.df)
+}
+
+
 ##########################################
 ############ To clean ####################
 ##########################################

--- a/Code/Rift_Segments_Main.R
+++ b/Code/Rift_Segments_Main.R
@@ -9,6 +9,7 @@ library(prodlim)
 library(latex2exp)
 library(stringr) # for str_replace
 library(mgcv) # for rmvn
+library(dplyr)

 # Set Work Directory ------------------------------------------------------
 setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location (if working in RStudio)
@@ -21,7 +22,7 @@ source('Rift_Segments_Functions.R')
 mat = as.matrix(expand.grid(t1 = c(0,1), t2 = c(0,1), t3 = c(0,1)))
 mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts

-# Generate results : uncomment if you don't have the output files already
+#--------- Generate results : uncomment if you don't have the output files already -----
 # for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
 #   model <- mle2(minuslogl = segmentsNLL,
 #                start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
@@ -32,7 +33,7 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
 #   saveRDS(pars.best.fit, file = paste("../Output/segment/",str_replace(name," ","_"),"_param.RDS", sep=""))
 # }

-# Compute correlations using covariance parameters
+#------- Compute correlations using covariance parameters -----
 # for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
 #   print(name)
 #   model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
@@ -46,8 +47,70 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
 #   print(corr)
 # }

+#--------- Plot fits, data, and confidence intervals  -----
 # Only works if you've saved the model files as intended
+# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
+#  g <- plot_fits_with_CI(name, num.draws = 1000, scale = "rel") 
+#  plot(g)
+# }
+
+#---------- Compare model predictions ---------
+# agg.fits <- NULL
+# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
+#   fit <- compute_CI(name, 1000, "rel", data_loading(name))
+#   fit$dataset <- name
+#   agg.fits <- rbind(agg.fits, fit)
+# }
+# # have a look at forcats package, fct_inorder(), reorder or relevel
+# agg.fits$comb <- factor(agg.fits$comb, levels = c("Empty","S","M","L","SM","SL","ML","SML"))
+# agg.fits <- agg.fits %>% group_by(dataset, comb) %>% summarise(max = max(pred),
+#                                                           min = min(pred))
+# 
+# ggplot(agg.fits) + geom_linerange(aes(x = comb,
+#                                     ymin = min, ymax = max, col = dataset),
+#                                   position = position_dodge(.5), size = 5, alpha = 0.7) +
+#   labs(y = "CI of predicted probability", x = "") +
+#   theme_bw() +
+#   guides(col = guide_legend(ncol=2)) +
+#   theme(axis.text.x = element_text(size = 19),
+#         axis.text.y = element_text(size = 20),
+#         axis.title.y = element_text(size = 22),
+#         legend.title = element_text(size = 19),
+#         legend.text = element_text(size = 19),
+#         panel.grid.minor.y = element_blank(),
+#         legend.position = "bottom")
+
 for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
- g <- plot_fits_with_CI(name)
- plot(g)
+  freq <- data_loading(name)
+  param.cov <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_param.RDS", sep=""))
+  fits.rel.cov <- apply(mat, 1, prob.by.comb, pars = param.cov)
+  fits.abs.cov <- sum(freq) * fits.rel.cov
+  
+  param.no.cov <- c(param.cov[1:3],0,0,0)
+  fits.rel.no.cov <- apply(mat, 1, prob.by.comb, pars = param.no.cov)
+  fits.abs.no.cov <- sum(freq) * fits.rel.no.cov
+  
+  summary.df <- data.frame(type = names(freq), obs = freq, pred.no.cov = fits.abs.no.cov, pred.cov = fits.abs.cov)
+  
+  g <- ggplot(summary.df) + geom_col(aes(x = type, y = obs), fill = rgb(0.2,0.4,0.6,0.6)) +
+    geom_point(aes(x = type, y = pred.no.cov, fill = "No covariance", shape = "No covariance"), col = "black", size = 2, alpha = 0.6) +
+    geom_point(aes(x = type, y = pred.cov, fill = "Covariance", shape = "Covariance"), col = "black", size = 2, alpha = 0.6) + 
+    labs(y = "Number of occurences", x = "") +
+    theme_bw() +
+    scale_fill_manual(name = "Model prediction:",
+                      labels = c("Covariance","No covariance"),
+                      values = c("green", "red")) + 
+    scale_shape_manual(name = "Model prediction:",
+                       labels = c("Covariance","No covariance"),
+                       values = c(21, 25)) +
+    guides(col = guide_legend(ncol=2)) +
+    ggtitle(name) +
+    theme(axis.text.x = element_text(size = 19),
+          axis.text.y = element_text(size = 20),
+          axis.title.y = element_text(size = 22),
+          legend.title = element_text(size = 19),
+          legend.text = element_text(size = 19),
+          legend.position = "bottom",
+          plot.title = element_text(size = 22))
+  plot(g)
 }
--- a/Output/segment/RVFV_progeny_cov_impact.png
+++ b/Output/segment/RVFV_progeny_cov_impact.png
--- a/Output/segment/RVFV_stock_ins_cov_impact.png
+++ b/Output/segment/RVFV_stock_ins_cov_impact.png
--- a/Output/segment/RVFV_stock_mam_cov_impact.png
+++ b/Output/segment/RVFV_stock_mam_cov_impact.png
--- a/Output/segment/SBV_progeny_cov_impact.png
+++ b/Output/segment/SBV_progeny_cov_impact.png
--- a/Output/segment/compare_predictions.png
+++ b/Output/segment/compare_predictions.png