Skip to content
Snippets Groups Projects
Commit 85814a72 authored by Cecilia, Helene's avatar Cecilia, Helene
Browse files

Segment script modified to automatize the possibility to fix/estimate given...

Segment script modified to automatize the possibility to fix/estimate given parameters and perform likelihood ratio tests.
In the negative log-likelihood function, p_m and p_l are therefore defined as multiplicative coefficient to be applied to p_s, so that keeping p constant for the three segments is equivalent to having p_m and p_l = 1
parent ec6391af
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,7 @@ library(emdbook)
library(bbmle)
library(Hmisc)
library(prodlim)
library(latex2exp)
# Set Work Directory ------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location (if working in RStudio)
......@@ -17,10 +18,10 @@ getwd()
# Data loading and processing ---------------------------------------------
# SegCounts <- read.csv('../Data/segmentdata/RVF_invitro_segmentcounts.csv')
SegCounts <- read.csv('../Data/segmentdata/SBV_progeny_mammalians.csv')
# SegCounts <- read.csv('../Data/segmentdata/SBV_progeny_mammalians.csv')
# View(SegCounts)
names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML)
# names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML)
# Matrix with content of SegCounts columns (binary coded, t1 = S, t2 = M, t3 = L)
mat = as.matrix(expand.grid(t1 = c(0,1), t2 = c(0,1), t3 = c(0,1)))
......@@ -35,25 +36,45 @@ mat = mat[c(1,2,3,5,4,6,7,8),] #hard coded to match order of SegCounts
#
# freq = c(SegCounts[,-1])
freq = colSums(SegCounts[,-1])
# freq = colSums(SegCounts[,-1])
# freq <- c(Empty = 1453, S = 391, M = 247, L = 140,
# SM = 270, SL = 159, ML = 120, SML = 201) # stock vero E6
# freq <- c(Empty = 934, S = 59, M = 359, L = 218,
# SM = 119, SL = 69, ML = 579, SML = 703) # stock C6/36
freq <- c(Empty = 934, S = 59, M = 359, L = 218,
SM = 119, SL = 69, ML = 579, SML = 703) # stock C6/36
# Likelihood functions -------------------------------------------------------
segmentsNLL = function(params, mat, freq, hypothesis){
# segmentsNLL = function(params, mat, freq, hypothesis){
# p_s <- params[1]
# p_m <- ifelse(hypothesis == "one p", p_s, params[2])
# p_l <- ifelse(hypothesis == "one p", p_s, params[3])
#
# p_s <- 1/(1+exp(-p_s))
# p_m <- 1/(1+exp(-p_m))
# p_l <- 1/(1+exp(-p_l))
#
# cov_sm <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[4]))
# cov_sl <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[5]))
# cov_ml <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[6]))
#
# Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml)
# for (ii in 1:nrow(mat)){
# results = mat[ii,]
# Prob = c(Prob, prob.by.comb(results, pars) )
# }
# -sum(freq * log(Prob))
# }
segmentsNLL = function(params, mat, freq){
p_s <- params[1]
p_m <- ifelse(hypothesis == "one p", p_s, params[2])
p_l <- ifelse(hypothesis == "one p", p_s, params[3])
p_m <- params[2] * p_s
p_l <- params[3] * p_s
p_s <- 1/(1+exp(-p_s))
p_m <- 1/(1+exp(-p_m))
p_l <- 1/(1+exp(-p_l))
cov_sm <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[4]))
cov_sl <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[5]))
cov_ml <- ifelse(hypothesis == "one p", 0, ifelse(hypothesis == "three p", 0, params[6]))
cov_sm <- params[4]
cov_sl <- params[5]
cov_ml <- params[6]
Prob = numeric(); pars = c(p_s, p_m, p_l, cov_sm, cov_sl, cov_ml)
for (ii in 1:nrow(mat)){
......@@ -83,17 +104,64 @@ prob.by.comb <- function(results, pars){
# This is quite efficient for the writing but poses a problem for automatic likelihood ratio tests
# as all the models are considered to have the same number of parameters (df=6)
# segmod.one.p <- mle2(minuslogl = segmentsNLL,
# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
# data = list(mat = mat, freq = freq, hypothesis = "one p"))
#
# segmod.three.p <- mle2(minuslogl = segmentsNLL,
# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
# data = list(mat = mat, freq = freq, hypothesis = "three p"))
#
# segmod.corr <- mle2(minuslogl = segmentsNLL,
# start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
# data = list(mat = mat, freq = freq, hypothesis = "corr"))
segmod.one.p <- mle2(minuslogl = segmentsNLL,
start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
data = list(mat = mat, freq = freq, hypothesis = "one p"))
start = c(p_s = 0.5),
fixed = c(p_m = 1, p_l = 1, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
data = list(mat = mat, freq = freq))
pars.best.fit = c(rep(1/(1 + exp(-segmod.one.p@coef)),3),0,0,0)
# mod.title <- TeX("RVFV progeny virions ; p_S = p_M = p_L ; no correlation")
# mod.title <- TeX("RVFV stock, Vero E6 ; p_S = p_M = p_L ; no correlation")
mod.title <- TeX("RVFV stock, C6 36 ; p_S = p_M = p_L ; no correlation")
segmod.one.p.corr <- mle2(minuslogl = segmentsNLL,
start = c(p_s = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
fixed = c(p_m = 1, p_l = 1),
data = list(mat = mat, freq = freq))
pars.best.fit = c(rep(1/(1 + exp(-segmod.one.p.corr@coef[1])),3),segmod.one.p.corr@coef[2:4])
# mod.title <- TeX("RVFV progeny virions ; p_S = p_M = p_L ; correlation")
# mod.title <- TeX("RVFV stock, Vero E6 ; p_S = p_M = p_L ; correlation")
mod.title <- TeX("RVFV stock, C6 36 ; p_S = p_M = p_L ; correlation")
segmod.three.p <- mle2(minuslogl = segmentsNLL,
start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0., cov_sl = 0., cov_ml = 0.),
data = list(mat = mat, freq = freq, hypothesis = "three p"))
segmod.corr <- mle2(minuslogl = segmentsNLL,
start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
data = list(mat = mat, freq = freq, hypothesis = "corr"))
start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5),
fixed = c(cov_sm = 0., cov_sl = 0., cov_ml = 0.),
data = list(mat = mat, freq = freq))
coef <- c(1,segmod.three.p@coef[2:3]) * segmod.three.p@coef[1]
pars.best.fit = c(1/(1 + exp(-coef)),rep(0,3))
# mod.title <- TeX("RVFV progeny virions ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation")
# mod.title <- TeX("RVFV stock, Vero E6 ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation")
mod.title <- TeX("RVFV stock, C6 36 ; p_S $\\neq$ p_M $\\neq$ p_L ; no correlation")
segmod.three.p.corr <- mle2(minuslogl = segmentsNLL,
start = c(p_s = 0.5, p_m = 0.5, p_l = 0.5, cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
data = list(mat = mat, freq = freq))
coef <- c(1,segmod.three.p.corr@coef[2:3]) * segmod.three.p.corr@coef[1]
pars.best.fit = c(1/(1 + exp(-coef)),segmod.three.p.corr@coef[4:6])
# mod.title <- TeX("RVFV progeny virions ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation")
# mod.title <- TeX("RVFV stock, Vero E6 ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation")
mod.title <- TeX("RVFV stock, C6 36 ; p_S $\\neq$ p_M $\\neq$ p_L ; correlation")
fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit)
fits = sum(freq) * fits
h=barplot(fits, ylim = c(0,2900), names.arg = names(freq), main = mod.title)
points(h,freq, pch = 17, col = "red")
# formatting for ggplot
# model.one.p.rvf.progeny <- data.frame(type = names(freq), obs = freq, pred = fits)
anova(segmod.one.p, segmod.three.p, segmod.one.p.corr, segmod.three.p.corr)
# So instead of using anova function you should compute the ratio by hand, and compare to a chi-squared table
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment