Skip to content
Snippets Groups Projects
Commit f9f8f7c2 authored by Cecilia, Helene's avatar Cecilia, Helene
Browse files

Segments script updated to include correlation between segments

3 parameters, one for each pair of segments
If the two segments of the pair are absent or present, the covariance parameter is added
If one is present and the other absent, the covariance parameter is substracted
Improves the fit for SML particles
parent 7599d2bd
Branches
No related tags found
No related merge requests found
......@@ -12,12 +12,12 @@ setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file
getwd()
# Load source files -------------------------------------------------------
source('Rift_Segments_Functions.R')
# source('Rift_Segments_Functions.R')
# Data loading and processing ---------------------------------------------
SegCounts <- read.csv('../Data/segmentdata/RVF_invitro_segmentcounts.csv')
View(SegCounts)
# View(SegCounts)
names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML)
# Matrix with content of SegCounts columns (binary coded, t1 = S, t2 = M, t3 = L)
......@@ -37,19 +37,19 @@ freq = colSums(SegCounts[,-1])
# Likelihood functions -------------------------------------------------------
segmentsNLL1 = function(params,mat,freq){
segmentsNLL1 = function(params,mat,freq){
p_s <- params[1]
p_m <- params[2]
p_l <- params[3]
p_s <- 1/(1+exp(-p_s))
p_m <- 1/(1+exp(-p_m))
p_l <- 1/(1+exp(-p_l))
Prob = numeric(); pars = c(p_s, p_m, p_l)
for (ii in 1:nrow(mat)){
results = mat[ii,]
Prob = c(Prob, prob.by.comb(results, pars) )
Prob = c(Prob, prob.by.comb(results, pars) )
}
-sum(freq * log(Prob))
}
......@@ -59,17 +59,126 @@ parnames(segmentsNLL1) <- c("p_s", "p_m", "p_l")
prob.by.comb <- function(results, pars){
prod(pars^results * (1-pars)^(1-results))
}
# fit model ---------------------------------------------------------------
segmod <- mle2(minuslogl = segmentsNLL1, start = c( p_s = 0.5, p_m = 0.5, p_l = 0.5),
data = list(mat = mat, freq = freq ))
data = list(mat = mat, freq = freq ))
pars.best.fit = 1/(1 + exp(-segmod@coef))
p_seg <- pars.best.fit
fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit)
fits = sum(freq) * fits
h=barplot(fits, ylim = c(0,2900), names.arg = names(freq))
points(h,freq, pch = 17)
####################################################
##### Try adding correlations between segments #####
##### 1st try: assuming probability for each #######
##### segments are known and we only estimate ######
##### the correlations #############################
####################################################
# Likelihood functions -------------------------------------------------------
segmentsNLL2 = function(params, p_seg, mat,freq){
cov_sm <- params[1]
cov_sl <- params[2]
cov_ml <- params[3]
Prob = numeric(); pars = c(cov_sm, cov_sl, cov_ml)
for (ii in 1:nrow(mat)){
results = mat[ii,]
Prob = c(Prob, prob.by.comb.corr(results, pars, p_seg) )
}
-sum(freq * log(Prob))
}
parnames(segmentsNLL2) <- c("cov_sm", "cov_sl", "cov_ml")
prob.by.comb.corr <- function(results, pars, p_seg){
both_sm <- results[1] - results[2]
both_sl <- results[1] - results[3]
both_ml <- results[2] - results[3]
cov_sm <- pars[1]
cov_sl <- pars[2]
cov_ml <- pars[3]
prod(p_seg^results * (1-p_seg)^(1-results)) +
cov_sm * (-1)^both_sm +
cov_sl * (-1)^both_sl +
cov_ml * (-1)^both_ml
}
# fit model ---------------------------------------------------------------
# Some starting values trigger "Error in optim: la valeur initiale dans 'vmin' n'est pas finie"
# There are also some warnings "In log(Prob) : production de NaN"
segmod.corr <- mle2(minuslogl = segmentsNLL2, start = c( cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02),
data = list(p_seg = p_seg, mat = mat, freq = freq ))
cov_pars <- segmod.corr@coef
fits = apply(mat, 1, prob.by.comb.corr, pars = cov_pars, p_seg = p_seg)
fits = sum(freq) * fits
h=barplot(fits, ylim = c(0,2900), names.arg = names(freq))
points(h,freq, pch = 17)
####################################################
##### 2nd try: estimating both probabilities of ####
##### segments and their correlations ##############
####################################################
# Likelihood functions -------------------------------------------------------
segmentsNLL3 = function(params, p_seg, mat,freq){
cov_sm <- params[1]
cov_sl <- params[2]
cov_ml <- params[3]
p_s <- params[4]
p_m <- params[5]
p_l <- params[6]
p_s <- 1/(1+exp(-p_s))
p_m <- 1/(1+exp(-p_m))
p_l <- 1/(1+exp(-p_l))
Prob = numeric(); pars = c(cov_sm, cov_sl, cov_ml, p_s, p_m, p_l)
for (ii in 1:nrow(mat)){
results = mat[ii,]
Prob = c(Prob, prob.by.comb.corr.all(results, pars) )
}
-sum(freq * log(Prob))
}
parnames(segmentsNLL3) <- c("cov_sm", "cov_sl", "cov_ml", "p_s", "p_m", "p_l")
prob.by.comb.corr.all <- function(results, pars){
both_sm <- results[1] - results[2]
both_sl <- results[1] - results[3]
both_ml <- results[2] - results[3]
cov_sm <- pars[1]
cov_sl <- pars[2]
cov_ml <- pars[3]
p_seg <- pars[4:6]
prod(p_seg^results * (1-p_seg)^(1-results)) +
cov_sm * (-1)^both_sm +
cov_sl * (-1)^both_sl +
cov_ml * (-1)^both_ml
}
# fit model ---------------------------------------------------------------
# Some starting values trigger "Error in optim: la valeur initiale dans 'vmin' n'est pas finie"
# There are also some warnings "In log(Prob) : production de NaN"
segmod.corr <- mle2(minuslogl = segmentsNLL3,
start = c( cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02, p_s = 0.21, p_m = 0.17, p_l = 0.25),
data = list(mat = mat, freq = freq ))
cov_pars <- segmod.corr@coef[c("cov_sm", "cov_sl", "cov_ml")]
p_seg <- 1/(1 + exp(-segmod.corr@coef[c("p_s","p_m","p_l")]))
fits = apply(mat, 1, prob.by.comb.corr.all, pars = c(cov_pars, p_seg))
fits = sum(freq) * fits
h=barplot(fits, ylim = c(0,2900), names.arg = names(freq))
points(h,freq, pch = 17)
Output/segment_fit_with_correlation_cov.png

5.98 KiB

Output/segment_fit_with_correlation_cov_pseg.png

5.84 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment