Skip to content
Snippets Groups Projects
Commit 4769a3d7 authored by Cecilia, Helene's avatar Cecilia, Helene
Browse files

Segment analysis : going further on analysing the results, comparing fits of...

Segment analysis : going further on analysing the results, comparing fits of different datasets + quantifying the contribution of covariances
parent 53816b9b
Branches
No related tags found
No related merge requests found
......@@ -74,28 +74,21 @@ prob.by.comb <- function(results, pars){
cov_ml * (-1)^both_ml
}
plot_fits_with_CI <- function(name){
plot_fits_with_CI <- function(name, num.draws, scale){
freq <- data_loading(name)
freq.df <- data.frame(comb = names(freq), obs = freq, type = "obs")
model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
COEFS <- rmvn(n = 1000, mu = model@coef, V = model@vcov)
fits.df <- NULL
for(i in seq(1,dim(COEFS)[1])){
coef <- COEFS[i,]
coef.convert <- c(1,coef[2:3]) * coef[1]
pars.best.fit = c(1/(1 + exp(-coef.convert)),coef[4:6])
fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit)
fits = sum(freq) * fits
fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = fits))
if(scale == "rel"){
obs <- freq/sum(freq)
}else{
obs <- freq
}
freq.df <- data.frame(comb = names(freq), obs = obs, type = "obs")
fits.df <- compute_CI(name, num.draws, scale, freq)
fits.df$type <- "pred"
# to keep order of the combination types (not in alphabetical order)
freq.df$comb <- factor(freq.df$comb, levels = freq.df$comb)
fits.df$comb <- factor(fits.df$comb, levels = freq.df$comb)
g <- ggplot() + geom_col(data = freq.df, aes(x = comb, y = obs, fill = type)) +
geom_violin(data = fits.df, aes(x = comb, y = pred, fill = type), alpha = 0.5) +
# scale_fill_manual(values = c(observations = "red",
# predictions = "cyan")) +
labs(x = "", y = "Number of occurences", fill = "") +
ggtitle(name) +
theme_bw() +
......@@ -107,6 +100,27 @@ plot_fits_with_CI <- function(name){
return(g)
}
compute_CI <- function(name, num.draws, scale, freq){
model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
COEFS <- rmvn(n = num.draws, mu = model@coef, V = model@vcov)
fits.df <- NULL
for(i in seq(1,dim(COEFS)[1])){
coef <- COEFS[i,]
coef.convert <- c(1,coef[2:3]) * coef[1]
pars.best.fit <- c(1/(1 + exp(-coef.convert)),coef[4:6])
fits.rel <- apply(mat, 1, prob.by.comb, pars = pars.best.fit)
fits.abs <- sum(freq) * fits.rel
if(scale == "rel"){
pred <- fits.rel
}else{
pred <- fits.abs
}
fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = pred))
}
return(fits.df)
}
##########################################
############ To clean ####################
##########################################
......
......@@ -9,6 +9,7 @@ library(prodlim)
library(latex2exp)
library(stringr) # for str_replace
library(mgcv) # for rmvn
library(dplyr)
# Set Work Directory ------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location (if working in RStudio)
......@@ -21,7 +22,7 @@ source('Rift_Segments_Functions.R')
mat = as.matrix(expand.grid(t1 = c(0,1), t2 = c(0,1), t3 = c(0,1)))
mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
# Generate results : uncomment if you don't have the output files already
#--------- Generate results : uncomment if you don't have the output files already -----
# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
# model <- mle2(minuslogl = segmentsNLL,
# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
......@@ -32,7 +33,7 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
# saveRDS(pars.best.fit, file = paste("../Output/segment/",str_replace(name," ","_"),"_param.RDS", sep=""))
# }
# Compute correlations using covariance parameters
#------- Compute correlations using covariance parameters -----
# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
# print(name)
# model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
......@@ -46,8 +47,70 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
# print(corr)
# }
#--------- Plot fits, data, and confidence intervals -----
# Only works if you've saved the model files as intended
# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
# g <- plot_fits_with_CI(name, num.draws = 1000, scale = "rel")
# plot(g)
# }
#---------- Compare model predictions ---------
# agg.fits <- NULL
# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
# fit <- compute_CI(name, 1000, "rel", data_loading(name))
# fit$dataset <- name
# agg.fits <- rbind(agg.fits, fit)
# }
# # have a look at forcats package, fct_inorder(), reorder or relevel
# agg.fits$comb <- factor(agg.fits$comb, levels = c("Empty","S","M","L","SM","SL","ML","SML"))
# agg.fits <- agg.fits %>% group_by(dataset, comb) %>% summarise(max = max(pred),
# min = min(pred))
#
# ggplot(agg.fits) + geom_linerange(aes(x = comb,
# ymin = min, ymax = max, col = dataset),
# position = position_dodge(.5), size = 5, alpha = 0.7) +
# labs(y = "CI of predicted probability", x = "") +
# theme_bw() +
# guides(col = guide_legend(ncol=2)) +
# theme(axis.text.x = element_text(size = 19),
# axis.text.y = element_text(size = 20),
# axis.title.y = element_text(size = 22),
# legend.title = element_text(size = 19),
# legend.text = element_text(size = 19),
# panel.grid.minor.y = element_blank(),
# legend.position = "bottom")
for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
g <- plot_fits_with_CI(name)
plot(g)
freq <- data_loading(name)
param.cov <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_param.RDS", sep=""))
fits.rel.cov <- apply(mat, 1, prob.by.comb, pars = param.cov)
fits.abs.cov <- sum(freq) * fits.rel.cov
param.no.cov <- c(param.cov[1:3],0,0,0)
fits.rel.no.cov <- apply(mat, 1, prob.by.comb, pars = param.no.cov)
fits.abs.no.cov <- sum(freq) * fits.rel.no.cov
summary.df <- data.frame(type = names(freq), obs = freq, pred.no.cov = fits.abs.no.cov, pred.cov = fits.abs.cov)
g <- ggplot(summary.df) + geom_col(aes(x = type, y = obs), fill = rgb(0.2,0.4,0.6,0.6)) +
geom_point(aes(x = type, y = pred.no.cov, fill = "No covariance", shape = "No covariance"), col = "black", size = 2, alpha = 0.6) +
geom_point(aes(x = type, y = pred.cov, fill = "Covariance", shape = "Covariance"), col = "black", size = 2, alpha = 0.6) +
labs(y = "Number of occurences", x = "") +
theme_bw() +
scale_fill_manual(name = "Model prediction:",
labels = c("Covariance","No covariance"),
values = c("green", "red")) +
scale_shape_manual(name = "Model prediction:",
labels = c("Covariance","No covariance"),
values = c(21, 25)) +
guides(col = guide_legend(ncol=2)) +
ggtitle(name) +
theme(axis.text.x = element_text(size = 19),
axis.text.y = element_text(size = 20),
axis.title.y = element_text(size = 22),
legend.title = element_text(size = 19),
legend.text = element_text(size = 19),
legend.position = "bottom",
plot.title = element_text(size = 22))
plot(g)
}
Output/segment/RVFV_progeny_cov_impact.png

30.7 KiB

Output/segment/RVFV_stock_ins_cov_impact.png

32.5 KiB

Output/segment/RVFV_stock_mam_cov_impact.png

31.7 KiB

Output/segment/SBV_progeny_cov_impact.png

32.1 KiB

Output/segment/compare_predictions.png

34.3 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment