diff --git a/Code/Rift_Segments_Functions.R b/Code/Rift_Segments_Functions.R index 373c60ef19deeeb450afffce40f9df5f083210e8..9973de9719ccd778716ffd9fc5ff7a9aa5a8e3a1 100644 --- a/Code/Rift_Segments_Functions.R +++ b/Code/Rift_Segments_Functions.R @@ -74,28 +74,21 @@ prob.by.comb <- function(results, pars){ cov_ml * (-1)^both_ml } -plot_fits_with_CI <- function(name){ +plot_fits_with_CI <- function(name, num.draws, scale){ freq <- data_loading(name) - freq.df <- data.frame(comb = names(freq), obs = freq, type = "obs") - model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep="")) - COEFS <- rmvn(n = 1000, mu = model@coef, V = model@vcov) - fits.df <- NULL - for(i in seq(1,dim(COEFS)[1])){ - coef <- COEFS[i,] - coef.convert <- c(1,coef[2:3]) * coef[1] - pars.best.fit = c(1/(1 + exp(-coef.convert)),coef[4:6]) - fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit) - fits = sum(freq) * fits - fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = fits)) + if(scale == "rel"){ + obs <- freq/sum(freq) + }else{ + obs <- freq } + freq.df <- data.frame(comb = names(freq), obs = obs, type = "obs") + fits.df <- compute_CI(name, num.draws, scale, freq) fits.df$type <- "pred" # to keep order of the combination types (not in alphabetical order) freq.df$comb <- factor(freq.df$comb, levels = freq.df$comb) fits.df$comb <- factor(fits.df$comb, levels = freq.df$comb) g <- ggplot() + geom_col(data = freq.df, aes(x = comb, y = obs, fill = type)) + geom_violin(data = fits.df, aes(x = comb, y = pred, fill = type), alpha = 0.5) + - # scale_fill_manual(values = c(observations = "red", - # predictions = "cyan")) + labs(x = "", y = "Number of occurences", fill = "") + ggtitle(name) + theme_bw() + @@ -107,6 +100,27 @@ plot_fits_with_CI <- function(name){ return(g) } +compute_CI <- function(name, num.draws, scale, freq){ + model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep="")) + COEFS <- rmvn(n = num.draws, mu = model@coef, V = model@vcov) + fits.df <- NULL + for(i in seq(1,dim(COEFS)[1])){ + coef <- COEFS[i,] + coef.convert <- c(1,coef[2:3]) * coef[1] + pars.best.fit <- c(1/(1 + exp(-coef.convert)),coef[4:6]) + fits.rel <- apply(mat, 1, prob.by.comb, pars = pars.best.fit) + fits.abs <- sum(freq) * fits.rel + if(scale == "rel"){ + pred <- fits.rel + }else{ + pred <- fits.abs + } + fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = pred)) + } + return(fits.df) +} + + ########################################## ############ To clean #################### ########################################## diff --git a/Code/Rift_Segments_Main.R b/Code/Rift_Segments_Main.R index 392d1143ef9777d3b90d85a9a0c96a3527378f71..7466062aee0b677746d5678b6cf8a2d07eb07860 100644 --- a/Code/Rift_Segments_Main.R +++ b/Code/Rift_Segments_Main.R @@ -9,6 +9,7 @@ library(prodlim) library(latex2exp) library(stringr) # for str_replace library(mgcv) # for rmvn +library(dplyr) # Set Work Directory ------------------------------------------------------ setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location (if working in RStudio) @@ -21,7 +22,7 @@ source('Rift_Segments_Functions.R') mat = as.matrix(expand.grid(t1 = c(0,1), t2 = c(0,1), t3 = c(0,1))) mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts -# Generate results : uncomment if you don't have the output files already +#--------- Generate results : uncomment if you don't have the output files already ----- # for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){ # model <- mle2(minuslogl = segmentsNLL, # start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02), @@ -32,7 +33,7 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts # saveRDS(pars.best.fit, file = paste("../Output/segment/",str_replace(name," ","_"),"_param.RDS", sep="")) # } -# Compute correlations using covariance parameters +#------- Compute correlations using covariance parameters ----- # for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){ # print(name) # model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep="")) @@ -46,8 +47,70 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts # print(corr) # } +#--------- Plot fits, data, and confidence intervals ----- # Only works if you've saved the model files as intended +# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){ +# g <- plot_fits_with_CI(name, num.draws = 1000, scale = "rel") +# plot(g) +# } + +#---------- Compare model predictions --------- +# agg.fits <- NULL +# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){ +# fit <- compute_CI(name, 1000, "rel", data_loading(name)) +# fit$dataset <- name +# agg.fits <- rbind(agg.fits, fit) +# } +# # have a look at forcats package, fct_inorder(), reorder or relevel +# agg.fits$comb <- factor(agg.fits$comb, levels = c("Empty","S","M","L","SM","SL","ML","SML")) +# agg.fits <- agg.fits %>% group_by(dataset, comb) %>% summarise(max = max(pred), +# min = min(pred)) +# +# ggplot(agg.fits) + geom_linerange(aes(x = comb, +# ymin = min, ymax = max, col = dataset), +# position = position_dodge(.5), size = 5, alpha = 0.7) + +# labs(y = "CI of predicted probability", x = "") + +# theme_bw() + +# guides(col = guide_legend(ncol=2)) + +# theme(axis.text.x = element_text(size = 19), +# axis.text.y = element_text(size = 20), +# axis.title.y = element_text(size = 22), +# legend.title = element_text(size = 19), +# legend.text = element_text(size = 19), +# panel.grid.minor.y = element_blank(), +# legend.position = "bottom") + for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){ - g <- plot_fits_with_CI(name) - plot(g) + freq <- data_loading(name) + param.cov <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_param.RDS", sep="")) + fits.rel.cov <- apply(mat, 1, prob.by.comb, pars = param.cov) + fits.abs.cov <- sum(freq) * fits.rel.cov + + param.no.cov <- c(param.cov[1:3],0,0,0) + fits.rel.no.cov <- apply(mat, 1, prob.by.comb, pars = param.no.cov) + fits.abs.no.cov <- sum(freq) * fits.rel.no.cov + + summary.df <- data.frame(type = names(freq), obs = freq, pred.no.cov = fits.abs.no.cov, pred.cov = fits.abs.cov) + + g <- ggplot(summary.df) + geom_col(aes(x = type, y = obs), fill = rgb(0.2,0.4,0.6,0.6)) + + geom_point(aes(x = type, y = pred.no.cov, fill = "No covariance", shape = "No covariance"), col = "black", size = 2, alpha = 0.6) + + geom_point(aes(x = type, y = pred.cov, fill = "Covariance", shape = "Covariance"), col = "black", size = 2, alpha = 0.6) + + labs(y = "Number of occurences", x = "") + + theme_bw() + + scale_fill_manual(name = "Model prediction:", + labels = c("Covariance","No covariance"), + values = c("green", "red")) + + scale_shape_manual(name = "Model prediction:", + labels = c("Covariance","No covariance"), + values = c(21, 25)) + + guides(col = guide_legend(ncol=2)) + + ggtitle(name) + + theme(axis.text.x = element_text(size = 19), + axis.text.y = element_text(size = 20), + axis.title.y = element_text(size = 22), + legend.title = element_text(size = 19), + legend.text = element_text(size = 19), + legend.position = "bottom", + plot.title = element_text(size = 22)) + plot(g) } diff --git a/Output/segment/RVFV_progeny_cov_impact.png b/Output/segment/RVFV_progeny_cov_impact.png new file mode 100644 index 0000000000000000000000000000000000000000..53a05a79c255bb8a9919ec1e2b2359045f2c740a Binary files /dev/null and b/Output/segment/RVFV_progeny_cov_impact.png differ diff --git a/Output/segment/RVFV_stock_ins_cov_impact.png b/Output/segment/RVFV_stock_ins_cov_impact.png new file mode 100644 index 0000000000000000000000000000000000000000..349103566df997102982b43e116826d3e88f2619 Binary files /dev/null and b/Output/segment/RVFV_stock_ins_cov_impact.png differ diff --git a/Output/segment/RVFV_stock_mam_cov_impact.png b/Output/segment/RVFV_stock_mam_cov_impact.png new file mode 100644 index 0000000000000000000000000000000000000000..987f816c3f5a7509ad73df26170fd55376b1e315 Binary files /dev/null and b/Output/segment/RVFV_stock_mam_cov_impact.png differ diff --git a/Output/segment/SBV_progeny_cov_impact.png b/Output/segment/SBV_progeny_cov_impact.png new file mode 100644 index 0000000000000000000000000000000000000000..4325ab9319f78a98944e1d8c5a924de50d3e4414 Binary files /dev/null and b/Output/segment/SBV_progeny_cov_impact.png differ diff --git a/Output/segment/compare_predictions.png b/Output/segment/compare_predictions.png new file mode 100644 index 0000000000000000000000000000000000000000..84991265fbd0ef3e46d5efb9a07c86174b7ca8a8 Binary files /dev/null and b/Output/segment/compare_predictions.png differ