From 85814a72c3e4cbae407bc9aa3b650a006c2300f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne=20Cecilia?= <helene.cecilia@oniris-nantes.fr> Date: Tue, 26 May 2020 17:53:21 +0200 Subject: [PATCH] Segment script modified to automatize the possibility to fix/estimate given parameters and perform likelihood ratio tests. In the negative log-likelihood function, p_m and p_l are therefore defined as multiplicative coefficient to be applied to p_s, so that keeping p constant for the three segments is equivalent to having p_m and p_l = 1 --- Code/Rift_Segments_Main.R | 106 +++++++++++++++++++++++++++++++------- 1 file changed, 87 insertions(+), 19 deletions(-) diff --git a/Code/Rift_Segments_Main.R b/Code/Rift_Segments_Main.R index 9d72cac..0c98c1f 100644 --- a/Code/Rift_Segments_Main.R +++ b/Code/Rift_Segments_Main.R @@ -6,6 +6,7 @@ library(emdbook) library(bbmle) library(Hmisc) library(prodlim) +library(latex2exp) # Set Work Directory ------------------------------------------------------ setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location (if working in RStudio) @@ -17,10 +18,10 @@ getwd() # Data loading and processing --------------------------------------------- # SegCounts <- read.csv('../Data/segmentdata/RVF_invitro_segmentcounts.csv') -SegCounts <- read.csv('../Data/segmentdata/SBV_progeny_mammalians.csv') +# SegCounts <- read.csv('../Data/segmentdata/SBV_progeny_mammalians.csv') # View(SegCounts) -names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML) +# names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML) # Matrix with content of SegCounts columns (binary coded, t1 = S, t2 = M, t3 = L) mat = as.matrix(expand.grid(t1 = c(0,1), t2 = c(0,1), t3 = c(0,1))) @@ -35,25 +36,45 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts # # freq = c(SegCounts[,-1]) -freq = colSums(SegCounts[,-1]) +# freq = colSums(SegCounts[,-1]) # freq <- c(Empty = 1453, S = 391, M = 247, L = 140, # SM = 270, SL = 159, ML = 120, SML = 201) # stock vero E6 -# freq <- c(Empty = 934, S = 59, M = 359, L = 218, -# SM = 119, SL = 69, ML = 579, SML = 703) # stock C6/36 +freq <- c(Empty = 934, S = 59, M = 359, L = 218, + SM = 119, SL = 69, ML = 579, SML = 703) # stock C6/36 # Likelihood functions ------------------------------------------------------- -segmentsNLL = function(params, mat, freq, hypothesis){ +# segmentsNLL = function(params, mat, freq, hypothesis){ +# p_s <- params[1] +# p_m <- ifelse(hypothesis == "one p", p_s, params[2]) +# p_l <- ifelse(hypothesis == "one p", p_s, params[3]) +# +# p_s <- 1/(1+exp(-p_s)) +# p_m <- 1/(1+exp(-p_m)) +# p_l <- 1/(1+exp(-p_l)) +# +# cov_sm <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[4])) +# cov_sl <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[5])) +# cov_ml <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[6])) +# +# Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml) +# for (ii in 1:nrow(mat)){ +# results = mat[ii,] +# Prob = c(Prob, prob.by.comb(results, pars) ) +# } +# -sum(freq * log(Prob)) +# } +segmentsNLL = function(params, mat, freq){ p_s <- params[1] - p_m <- ifelse(hypothesis == "one p", p_s, params[2]) - p_l <- ifelse(hypothesis == "one p", p_s, params[3]) + p_m <- params[2] * p_s + p_l <- params[3] * p_s p_s <- 1/(1+exp(-p_s)) p_m <- 1/(1+exp(-p_m)) p_l <- 1/(1+exp(-p_l)) - cov_sm <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[4])) - cov_sl <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[5])) - cov_ml <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[6])) + cov_sm <- params[4] + cov_sl <- params[5] + cov_ml <- params[6] Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml) for (ii in 1:nrow(mat)){ @@ -83,17 +104,64 @@ prob.by.comb <- function(results, pars){ # This is quite efficient for the writing but poses a problem for automatic likelihood ratio tests # as all the models are considered to have the same number of parameters (df=6) +# segmod.one.p <- mle2(minuslogl = segmentsNLL, +# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.), +# data = list(mat = mat, freq = freq, hypothesis = "one p")) +# +# segmod.three.p <- mle2(minuslogl = segmentsNLL, +# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.), +# data = list(mat = mat, freq = freq, hypothesis = "three p")) +# +# segmod.corr <- mle2(minuslogl = segmentsNLL, +# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02), +# data = list(mat = mat, freq = freq, hypothesis = "corr")) + segmod.one.p <- mle2(minuslogl = segmentsNLL, - start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.), - data = list(mat = mat, freq = freq, hypothesis = "one p")) + start = c(p_s = 0.5), + fixed = c(p_m = 1, p_l = 1, cov_sm = 0., cov_sl = 0., cov_ml = 0.), + data = list(mat = mat, freq = freq)) +pars.best.fit = c(rep(1/(1 + exp(-segmod.one.p@coef)),3),0,0,0) +# mod.title <- TeX("RVFV progeny virions ; p_S = p_M = p_L ; no correlation") +# mod.title <- TeX("RVFV stock, Vero E6 ; p_S = p_M = p_L ; no correlation") +mod.title <- TeX("RVFV stock, C6 36 ; p_S = p_M = p_L ; no correlation") + +segmod.one.p.corr <- mle2(minuslogl = segmentsNLL, + start = c(p_s = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02), + fixed = c(p_m = 1, p_l = 1), + data = list(mat = mat, freq = freq)) +pars.best.fit = c(rep(1/(1 + exp(-segmod.one.p.corr@coef[1])),3),segmod.one.p.corr@coef[2:4]) +# mod.title <- TeX("RVFV progeny virions ; p_S = p_M = p_L ; correlation") +# mod.title <- TeX("RVFV stock, Vero E6 ; p_S = p_M = p_L ; correlation") +mod.title <- TeX("RVFV stock, C6 36 ; p_S = p_M = p_L ; correlation") segmod.three.p <- mle2(minuslogl = segmentsNLL, - start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.), - data = list(mat = mat, freq = freq, hypothesis = "three p")) - -segmod.corr <- mle2(minuslogl = segmentsNLL, - start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02), - data = list(mat = mat, freq = freq, hypothesis = "corr")) + start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5), + fixed = c(cov_sm = 0., cov_sl = 0., cov_ml = 0.), + data = list(mat = mat, freq = freq)) +coef <- c(1,segmod.three.p@coef[2:3]) * segmod.three.p@coef[1] +pars.best.fit = c(1/(1 + exp(-coef)),rep(0,3)) +# mod.title <- TeX("RVFV progeny virions ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation") +# mod.title <- TeX("RVFV stock, Vero E6 ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation") +mod.title <- TeX("RVFV stock, C6 36 ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation") + +segmod.three.p.corr <- mle2(minuslogl = segmentsNLL, + start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02), + data = list(mat = mat, freq = freq)) +coef <- c(1,segmod.three.p.corr@coef[2:3]) * segmod.three.p.corr@coef[1] +pars.best.fit = c(1/(1 + exp(-coef)),segmod.three.p.corr@coef[4:6]) +# mod.title <- TeX("RVFV progeny virions ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation") +# mod.title <- TeX("RVFV stock, Vero E6 ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation") +mod.title <- TeX("RVFV stock, C6 36 ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation") + +fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit) +fits = sum(freq) * fits +h=barplot(fits, ylim = c(0,2900), names.arg = names(freq), main = mod.title) +points(h,freq, pch = 17, col = "red") + +# formatting for ggplot +# model.one.p.rvf.progeny <- data.frame(type = names(freq), obs = freq, pred = fits) + +anova(segmod.one.p, segmod.three.p, segmod.one.p.corr, segmod.three.p.corr) # So instead of using anova function you should compute the ratio by hand, and compare to a chi-squared table -- GitLab