From 85814a72c3e4cbae407bc9aa3b650a006c2300f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A9l=C3=A8ne=20Cecilia?=
 <helene.cecilia@oniris-nantes.fr>
Date: Tue, 26 May 2020 17:53:21 +0200
Subject: [PATCH] Segment script modified to automatize the possibility to
 fix/estimate given parameters and perform likelihood ratio tests. In the
 negative log-likelihood function, p_m and p_l are therefore defined as
 multiplicative coefficient to be applied to p_s, so that keeping p constant
 for the three segments is equivalent to having p_m and p_l = 1

---
 Code/Rift_Segments_Main.R | 106 +++++++++++++++++++++++++++++++-------
 1 file changed, 87 insertions(+), 19 deletions(-)

diff --git a/Code/Rift_Segments_Main.R b/Code/Rift_Segments_Main.R
index 9d72cac..0c98c1f 100644
--- a/Code/Rift_Segments_Main.R
+++ b/Code/Rift_Segments_Main.R
@@ -6,6 +6,7 @@ library(emdbook)
 library(bbmle)
 library(Hmisc)
 library(prodlim)
+library(latex2exp)
 
 # Set Work Directory ------------------------------------------------------
 setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location (if working in RStudio)
@@ -17,10 +18,10 @@ getwd()
 
 # Data loading and processing ---------------------------------------------
 # SegCounts <- read.csv('../Data/segmentdata/RVF_invitro_segmentcounts.csv')
-SegCounts <- read.csv('../Data/segmentdata/SBV_progeny_mammalians.csv')
+# SegCounts <- read.csv('../Data/segmentdata/SBV_progeny_mammalians.csv')
 
 # View(SegCounts)
-names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML)
+# names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML)
 
 # Matrix with content of SegCounts columns (binary coded, t1 = S, t2 = M, t3 = L)
 mat = as.matrix(expand.grid(t1 = c(0,1), t2 = c(0,1), t3 = c(0,1)))
@@ -35,25 +36,45 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
 # 
 # freq = c(SegCounts[,-1])
 
-freq = colSums(SegCounts[,-1])
+# freq = colSums(SegCounts[,-1])
 # freq <- c(Empty = 1453, S = 391, M = 247, L = 140,
 #           SM = 270, SL = 159, ML = 120, SML = 201) # stock vero E6
-# freq <- c(Empty = 934, S = 59, M = 359, L = 218,
-#           SM = 119, SL = 69, ML = 579, SML = 703) # stock C6/36
+freq <- c(Empty = 934, S = 59, M = 359, L = 218,
+          SM = 119, SL = 69, ML = 579, SML = 703) # stock C6/36
 
 # Likelihood functions  -------------------------------------------------------
-segmentsNLL = function(params, mat, freq, hypothesis){ 
+# segmentsNLL = function(params, mat, freq, hypothesis){ 
+#   p_s <- params[1]
+#   p_m <- ifelse(hypothesis == "one p", p_s, params[2])
+#   p_l <- ifelse(hypothesis == "one p", p_s, params[3])
+#   
+#   p_s <- 1/(1+exp(-p_s))
+#   p_m <- 1/(1+exp(-p_m))
+#   p_l <- 1/(1+exp(-p_l))
+#   
+#   cov_sm <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[4]))
+#   cov_sl <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[5]))
+#   cov_ml <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[6]))
+#   
+#   Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml)
+#   for (ii in 1:nrow(mat)){
+#     results = mat[ii,]
+#     Prob = c(Prob, prob.by.comb(results, pars) ) 
+#   }
+#   -sum(freq * log(Prob))
+# }
+segmentsNLL = function(params, mat, freq){ 
   p_s <- params[1]
-  p_m <- ifelse(hypothesis == "one p", p_s, params[2])
-  p_l <- ifelse(hypothesis == "one p", p_s, params[3])
+  p_m <- params[2] * p_s
+  p_l <- params[3] * p_s
   
   p_s <- 1/(1+exp(-p_s))
   p_m <- 1/(1+exp(-p_m))
   p_l <- 1/(1+exp(-p_l))
   
-  cov_sm <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[4]))
-  cov_sl <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[5]))
-  cov_ml <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[6]))
+  cov_sm <- params[4]
+  cov_sl <- params[5]
+  cov_ml <- params[6]
   
   Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml)
   for (ii in 1:nrow(mat)){
@@ -83,17 +104,64 @@ prob.by.comb <- function(results, pars){
 
 # This is quite efficient for the writing but poses a problem for automatic likelihood ratio tests
 # as all the models are considered to have the same number of parameters (df=6)
+# segmod.one.p <- mle2(minuslogl = segmentsNLL,
+#                     start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
+#                     data = list(mat = mat, freq = freq, hypothesis = "one p"))
+# 
+# segmod.three.p <- mle2(minuslogl = segmentsNLL,
+#                      start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
+#                      data = list(mat = mat, freq = freq, hypothesis = "three p"))
+# 
+# segmod.corr <- mle2(minuslogl = segmentsNLL,
+#                      start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
+#                      data = list(mat = mat, freq = freq, hypothesis = "corr"))
+
 segmod.one.p <- mle2(minuslogl = segmentsNLL,
-                    start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
-                    data = list(mat = mat, freq = freq, hypothesis = "one p"))
+                     start = c(p_s = 0.5),
+                     fixed = c(p_m = 1, p_l = 1, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
+                     data = list(mat = mat, freq = freq))
+pars.best.fit = c(rep(1/(1 + exp(-segmod.one.p@coef)),3),0,0,0) 
+# mod.title <- TeX("RVFV progeny virions ; p_S = p_M = p_L ; no correlation")
+# mod.title <- TeX("RVFV stock, Vero E6 ; p_S = p_M = p_L ; no correlation")
+mod.title <- TeX("RVFV stock, C6 36 ; p_S = p_M = p_L ; no correlation")
+
+segmod.one.p.corr <- mle2(minuslogl = segmentsNLL,
+                     start = c(p_s = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
+                     fixed = c(p_m = 1, p_l = 1),
+                     data = list(mat = mat, freq = freq))
+pars.best.fit = c(rep(1/(1 + exp(-segmod.one.p.corr@coef[1])),3),segmod.one.p.corr@coef[2:4]) 
+# mod.title <- TeX("RVFV progeny virions ; p_S = p_M = p_L ; correlation")
+# mod.title <- TeX("RVFV stock, Vero E6 ; p_S = p_M = p_L ; correlation")
+mod.title <- TeX("RVFV stock, C6 36 ; p_S = p_M = p_L ; correlation")
 
 segmod.three.p <- mle2(minuslogl = segmentsNLL,
-                     start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
-                     data = list(mat = mat, freq = freq, hypothesis = "three p"))
-
-segmod.corr <- mle2(minuslogl = segmentsNLL,
-                     start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
-                     data = list(mat = mat, freq = freq, hypothesis = "corr"))
+                          start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5),
+                          fixed = c(cov_sm = 0., cov_sl = 0., cov_ml = 0.),
+                          data = list(mat = mat, freq = freq))
+coef <- c(1,segmod.three.p@coef[2:3]) * segmod.three.p@coef[1]
+pars.best.fit = c(1/(1 + exp(-coef)),rep(0,3)) 
+# mod.title <- TeX("RVFV progeny virions ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation")
+# mod.title <- TeX("RVFV stock, Vero E6 ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation")
+mod.title <- TeX("RVFV stock, C6 36 ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation")
+
+segmod.three.p.corr <- mle2(minuslogl = segmentsNLL,
+                       start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
+                       data = list(mat = mat, freq = freq))
+coef <- c(1,segmod.three.p.corr@coef[2:3]) * segmod.three.p.corr@coef[1]
+pars.best.fit = c(1/(1 + exp(-coef)),segmod.three.p.corr@coef[4:6]) 
+# mod.title <- TeX("RVFV progeny virions ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation")
+# mod.title <- TeX("RVFV stock, Vero E6 ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation")
+mod.title <- TeX("RVFV stock, C6 36 ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation")
+
+fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit)
+fits = sum(freq) * fits
+h=barplot(fits, ylim = c(0,2900), names.arg = names(freq), main = mod.title)
+points(h,freq, pch = 17, col = "red")
+
+# formatting for ggplot
+# model.one.p.rvf.progeny <- data.frame(type = names(freq), obs = freq, pred = fits)
+
+anova(segmod.one.p, segmod.three.p, segmod.one.p.corr, segmod.three.p.corr)
 
 # So instead of using anova function you should compute the ratio by hand, and compare to a chi-squared table
 
-- 
GitLab