Skip to content
Snippets Groups Projects
Commit 579ac77d authored by Cecilia, Helene's avatar Cecilia, Helene
Browse files

Segment analysis : corrected confidence interval function (sample_from_CI) +...

Segment analysis : corrected confidence interval function (sample_from_CI) + compute CI of contribution of covariance (~ non randomness of packaging efficiency)
segmentNLL.nested function to test nested models
segmentNLL for the model with different p and covariances (the one retained for the analysis)
parent 7a7abb3e
Branches
No related tags found
No related merge requests found
......@@ -35,7 +35,7 @@ data_loading <- function(dataset.name){
}
# Likelihood functions -------------------------------------------------------
segmentsNLL = function(params, mat, freq){
segmentsNLL.nested = function(params, mat, freq){
p_s <- params[1]
p_m <- params[2] * p_s
p_l <- params[3] * p_s
......@@ -55,6 +55,29 @@ segmentsNLL = function(params, mat, freq){
}
-sum(freq * log(Prob))
}
# Pm and pl are no longer defined as multiplicative coefficients of ps
segmentsNLL = function(params, mat, freq){
p_s <- params[1]
p_m <- params[2]
p_l <- params[3]
p_s <- 1/(1+exp(-p_s))
p_m <- 1/(1+exp(-p_m))
p_l <- 1/(1+exp(-p_l))
cov_sm <- params[4]
cov_sl <- params[5]
cov_ml <- params[6]
Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml)
for (ii in 1:nrow(mat)){
results = mat[ii,]
Prob = c(Prob, prob.by.comb(results, pars) ) # tester ici si sum Prob == 1
}
-sum(freq * log(Prob))
}
parnames(segmentsNLL) <- c("p_s", "p_m", "p_l", "cov_sm", "cov_sl", "cov_ml")
prob.by.comb <- function(results, pars){
......@@ -74,48 +97,53 @@ prob.by.comb <- function(results, pars){
cov_ml * (-1)^both_ml
}
plot_fits_with_CI <- function(name, num.draws, scale){
freq <- data_loading(name)
if(scale == "rel"){
obs <- freq/sum(freq)
}else{
obs <- freq
}
freq.df <- data.frame(comb = names(freq), obs = obs, type = "obs")
fits.df <- compute_CI(name, num.draws, scale, freq)
fits.df$type <- "pred"
# to keep order of the combination types (not in alphabetical order)
freq.df$comb <- factor(freq.df$comb, levels = freq.df$comb)
fits.df$comb <- factor(fits.df$comb, levels = freq.df$comb)
g <- ggplot() + geom_col(data = freq.df, aes(x = comb, y = obs, fill = type)) +
geom_violin(data = fits.df, aes(x = comb, y = pred, fill = type), alpha = 0.5) +
labs(x = "", y = "Number of occurences", fill = "") +
ggtitle(name) +
theme_bw() +
theme(axis.text.x = element_text(size = 21),
axis.text.y = element_text(size = 20),
axis.title.y = element_text(size = 22),
legend.text = element_text(size = 22),
plot.title = element_text(size = 23))
return(g)
}
# plot_fits_with_CI <- function(name, num.draws, scale){ # add the 95% interval, probably stop using violin also?
# freq <- data_loading(name)
# if(scale == "rel"){
# obs <- freq/sum(freq)
# }else{
# obs <- freq
# }
# freq.df <- data.frame(comb = names(freq), obs = obs, type = "obs")
# fits.df <- sample_from_CI(name, num.draws, scale, freq)
# fits.df$type <- "pred"
# # to keep order of the combination types (not in alphabetical order)
# freq.df$comb <- factor(freq.df$comb, levels = freq.df$comb)
# fits.df$comb <- factor(fits.df$comb, levels = freq.df$comb)
# g <- ggplot() + geom_col(data = freq.df, aes(x = comb, y = obs, fill = type)) +
# geom_violin(data = fits.df, aes(x = comb, y = pred, fill = type), alpha = 0.5) +
# labs(x = "", y = "Number of occurences", fill = "") +
# ggtitle(name) +
# theme_bw() +
# theme(axis.text.x = element_text(size = 21),
# axis.text.y = element_text(size = 20),
# axis.title.y = element_text(size = 22),
# legend.text = element_text(size = 22),
# plot.title = element_text(size = 23))
# return(g)
# }
compute_CI <- function(name, num.draws, scale, freq){
model <- readRDS(paste("../Output/segment/",str_replace(name," ","_"),"_model.RDS", sep=""))
sample_from_CI <- function(name, num.draws, scale, freq){
model <- readRDS(paste("../Output/segment/",gsub(" ","_",name),"_model.RDS", sep=""))
COEFS <- rmvn(n = num.draws, mu = model@coef, V = model@vcov)
fits.df <- NULL
for(i in seq(1,dim(COEFS)[1])){
coef <- COEFS[i,]
coef.convert <- c(1,coef[2:3]) * coef[1]
coef.convert <- coef[1:3] #c(1,coef[2:3]) * coef[1]
pars.best.fit <- c(1/(1 + exp(-coef.convert)),coef[4:6])
fits.rel <- apply(mat, 1, prob.by.comb, pars = pars.best.fit)
names(fits.rel) <- names(freq)
fits.abs <- sum(freq) * fits.rel
if(scale == "rel"){
pred <- fits.rel
}else{
pred <- fits.abs
}
fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = pred))
pars.no.cov <- c(1/(1 + exp(-coef.convert)),0,0,0)
fit.no.cov <- apply(mat, 1, prob.by.comb, pars = pars.no.cov)
names(fit.no.cov) <- names(freq)
cov.contrib <- (fits.rel[["SML"]] - fit.no.cov[["SML"]])/fits.rel[["SML"]]
fits.df <- rbind(fits.df, data.frame(comb = names(freq), pred = pred, expl.effic = rep(cov.contrib, length(pred))))
}
return(fits.df)
}
......
......@@ -24,31 +24,44 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
#--------- Generate results : uncomment if you don't have the output files already -----
# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
# print(name)
# model <- mle2(minuslogl = segmentsNLL,
# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
# data = list(mat = mat, freq = data_loading(name)))
# coef <- c(1,model@coef[2:3]) * model@coef[1]
# # print(summary(model))
# coef <- model@coef[1:3] #c(1,model@coef[2:3]) * model@coef[1]
# pars.best.fit = c(1/(1 + exp(-coef)),model@coef[4:6])
# # print(pars.best.fit)
# # Prob = numeric() # test sum of all combination probabilities is equal to 1 : OK
# # for (ii in 1:nrow(mat)){
# # results = mat[ii,]
# # Prob = c(Prob, prob.by.comb(results, pars.best.fit) )
# # }
# # print(sum(Prob))
# # ci <- confint(model)
# # print(ci)
# # print(1/(1+exp(-ci[1:3,])))
# saveRDS(model, file = paste("../Output/segment/",gsub(" ","_",name),"_model.RDS", sep=""))
# saveRDS(pars.best.fit, file = paste("../Output/segment/",gsub(" ","_",name),"_param.RDS", sep=""))
# }
stock.ins <- readRDS("../Output/segment/RVFV_stock_insect_model.RDS")
stock.mam <- readRDS("../Output/segment/RVFV_stock_mammalian_model.RDS")
merge.stock.ll <- logLik(stock.ins) + logLik(stock.mam) #12 param
stock.ins.obs <- data_loading("RVFV stock insect")
stock.mam.obs <- data_loading("RVFV stock mammalian")
sum.stock.obs <- stock.ins.obs + stock.mam.obs
model.sum.stock <- mle2(minuslogl = segmentsNLL,
start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
data = list(mat = mat, freq = sum.stock.obs))
stat <- 2*(merge.stock.ll - logLik(model.sum.stock)) # 1150.84
pchisq(stat, df=6, lower.tail=FALSE) # 2.486916e-245
#--------- Justify the use of species-specific parameters ------
# stock.ins <- readRDS("../Output/segment/RVFV_stock_insect_model.RDS")
# stock.mam <- readRDS("../Output/segment/RVFV_stock_mammalian_model.RDS")
# merge.stock.ll <- logLik(stock.ins) + logLik(stock.mam) #12 param
#
# stock.ins.obs <- data_loading("RVFV stock insect")
# stock.mam.obs <- data_loading("RVFV stock mammalian")
# sum.stock.obs <- stock.ins.obs + stock.mam.obs
# model.sum.stock <- mle2(minuslogl = segmentsNLL,
# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
# data = list(mat = mat, freq = sum.stock.obs))
#
# stat <- 2*(merge.stock.ll - logLik(model.sum.stock)) # 1150.84
# pchisq(stat, df=6, lower.tail=FALSE) # 2.486916e-245
#------- Compute correlations using covariance parameters -----
# Not sure this formula (from Menten et al.) si directly transferable to our case
# Not sure this formula (from Menten et al.) is directly transferable to our case
# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
# print(name)
# model <- readRDS(paste("../Output/segment/",gsub(" ","_",name),"_model.RDS", sep=""))
......@@ -69,34 +82,44 @@ pchisq(stat, df=6, lower.tail=FALSE) # 2.486916e-245
# plot(g)
# }
#---------- Compare model predictions (CI) ---------
# agg.fits <- NULL
# for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
# fit <- compute_CI(name, 1000, "rel", data_loading(name))
# fit$dataset <- name
# agg.fits <- rbind(agg.fits, fit)
# }
# # have a look at forcats package, fct_inorder(), reorder or relevel
# agg.fits$comb <- factor(agg.fits$comb, levels = c("Empty","S","M","L","SM","SL","ML","SML"))
agg.fits <- NULL
for(name in c("RVFV progeny","SBV progeny")){
fit <- sample_from_CI(name, 1000, "rel", data_loading(name))
fit <- fit %>% group_by(comb) %>% summarise(ci_low = quantile(pred, 0.025),
ci_high = quantile(pred, 0.975))
fit$dataset <- name
agg.fits <- rbind(agg.fits, fit)
}
# have a look at forcats package, fct_inorder(), reorder or relevel
agg.fits$comb <- factor(agg.fits$comb, levels = c("Empty","S","M","L","SM","SL","ML","SML"))
# agg.fits <- agg.fits %>% group_by(dataset, comb) %>% summarise(max = max(pred),
# min = min(pred))
#
# ggplot(agg.fits) + geom_linerange(aes(x = comb,
# ymin = min, ymax = max, col = dataset),
# position = position_dodge(.5), size = 5, alpha = 0.7) +
# labs(y = "CI of predicted probability", x = "") +
# theme_bw() +
# guides(col = guide_legend(ncol=2)) +
# theme(axis.text.x = element_text(size = 19),
# axis.text.y = element_text(size = 20),
# axis.title.y = element_text(size = 22),
# legend.title = element_text(size = 19),
# legend.text = element_text(size = 19),
# panel.grid.minor.y = element_blank(),
# legend.position = "bottom")
ggplot(agg.fits) + geom_linerange(aes(x = comb,
ymin = ci_low, ymax = ci_high, col = dataset),
position = position_dodge(.5), size = 5, alpha = 0.7) +
labs(y = "CI of predicted probability", x = "") +
theme_bw() +
guides(col = guide_legend(ncol=2)) +
theme(axis.text.x = element_text(size = 19),
axis.text.y = element_text(size = 20),
axis.title.y = element_text(size = 22),
legend.title = element_text(size = 19),
legend.text = element_text(size = 19),
panel.grid.minor.y = element_blank(),
legend.position = "bottom")
# ------- Quantify and plot impact of covariance ---------
for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
fit <- sample_from_CI(name,1000,"rel",data_loading(name))
print(name)
print(quantile(fit$expl.effic, 0.025))
print(quantile(fit$expl.effic, 0.975))
}
for(name in c("RVFV progeny","SBV progeny","RVFV stock mammalian","RVFV stock insect")){
freq <- data_loading(name)
param.cov <- readRDS(paste("../Output/segment/",gsub(" ","_",name),"_param.RDS", sep=""))
......
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
Output/segment/compare_predictions_progeny_RVFV_SBV.png

29 KiB

Output/segment/compare_predictions_stock_mam_insect.png

26.9 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment