diff --git a/Code/Rift_Segments_Main.R b/Code/Rift_Segments_Main.R index 5e2ca06469787ee072e3d4640d3db52db600791c..d78c5e591b383cb2a1718bb51a1c79973ad60bd9 100644 --- a/Code/Rift_Segments_Main.R +++ b/Code/Rift_Segments_Main.R @@ -12,12 +12,12 @@ setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file getwd() # Load source files ------------------------------------------------------- -source('Rift_Segments_Functions.R') +# source('Rift_Segments_Functions.R') # Data loading and processing --------------------------------------------- SegCounts <- read.csv('../Data/segmentdata/RVF_invitro_segmentcounts.csv') -View(SegCounts) +# View(SegCounts) names(SegCounts) <- Cs(ID, Empty, S, M, L, SM, SL, ML, SML) # Matrix with content of SegCounts columns (binary coded, t1 = S, t2 = M, t3 = L) @@ -37,19 +37,19 @@ freq = colSums(SegCounts[,-1]) # Likelihood functions ------------------------------------------------------- -segmentsNLL1 = function(params,mat,freq){ +segmentsNLL1 = function(params,mat,freq){ p_s <- params[1] p_m <- params[2] p_l <- params[3] - + p_s <- 1/(1+exp(-p_s)) p_m <- 1/(1+exp(-p_m)) p_l <- 1/(1+exp(-p_l)) - + Prob = numeric(); pars = c(p_s, p_m, p_l) for (ii in 1:nrow(mat)){ results = mat[ii,] - Prob = c(Prob, prob.by.comb(results, pars) ) + Prob = c(Prob, prob.by.comb(results, pars) ) } -sum(freq * log(Prob)) } @@ -59,17 +59,126 @@ parnames(segmentsNLL1) <- c("p_s", "p_m", "p_l") prob.by.comb <- function(results, pars){ prod(pars^results * (1-pars)^(1-results)) } - # fit model --------------------------------------------------------------- segmod <- mle2(minuslogl = segmentsNLL1, start = c( p_s = 0.5, p_m = 0.5, p_l = 0.5), - data = list(mat = mat, freq = freq )) + data = list(mat = mat, freq = freq )) pars.best.fit = 1/(1 + exp(-segmod@coef)) +p_seg <- pars.best.fit fits = apply(mat, 1, prob.by.comb, pars = pars.best.fit) fits = sum(freq) * fits h=barplot(fits, ylim = c(0,2900), names.arg = names(freq)) points(h,freq, pch = 17) + + +#################################################### +##### Try adding correlations between segments ##### +##### 1st try: assuming probability for each ####### +##### segments are known and we only estimate ###### +##### the correlations ############################# +#################################################### + +# Likelihood functions ------------------------------------------------------- +segmentsNLL2 = function(params, p_seg, mat,freq){ + cov_sm <- params[1] + cov_sl <- params[2] + cov_ml <- params[3] + + Prob = numeric(); pars = c(cov_sm, cov_sl, cov_ml) + for (ii in 1:nrow(mat)){ + results = mat[ii,] + Prob = c(Prob, prob.by.comb.corr(results, pars, p_seg) ) + } + -sum(freq * log(Prob)) +} +parnames(segmentsNLL2) <- c("cov_sm", "cov_sl", "cov_ml") + +prob.by.comb.corr <- function(results, pars, p_seg){ + both_sm <- results[1] - results[2] + both_sl <- results[1] - results[3] + both_ml <- results[2] - results[3] + + cov_sm <- pars[1] + cov_sl <- pars[2] + cov_ml <- pars[3] + + prod(p_seg^results * (1-p_seg)^(1-results)) + + cov_sm * (-1)^both_sm + + cov_sl * (-1)^both_sl + + cov_ml * (-1)^both_ml +} + +# fit model --------------------------------------------------------------- +# Some starting values trigger "Error in optim: la valeur initiale dans 'vmin' n'est pas finie" +# There are also some warnings "In log(Prob) : production de NaN" +segmod.corr <- mle2(minuslogl = segmentsNLL2, start = c( cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02), + data = list(p_seg = p_seg, mat = mat, freq = freq )) +cov_pars <- segmod.corr@coef +fits = apply(mat, 1, prob.by.comb.corr, pars = cov_pars, p_seg = p_seg) +fits = sum(freq) * fits + +h=barplot(fits, ylim = c(0,2900), names.arg = names(freq)) +points(h,freq, pch = 17) + +#################################################### +##### 2nd try: estimating both probabilities of #### +##### segments and their correlations ############## +#################################################### + +# Likelihood functions ------------------------------------------------------- +segmentsNLL3 = function(params, p_seg, mat,freq){ + cov_sm <- params[1] + cov_sl <- params[2] + cov_ml <- params[3] + + p_s <- params[4] + p_m <- params[5] + p_l <- params[6] + + p_s <- 1/(1+exp(-p_s)) + p_m <- 1/(1+exp(-p_m)) + p_l <- 1/(1+exp(-p_l)) + + Prob = numeric(); pars = c(cov_sm, cov_sl, cov_ml, p_s, p_m, p_l) + for (ii in 1:nrow(mat)){ + results = mat[ii,] + Prob = c(Prob, prob.by.comb.corr.all(results, pars) ) + } + -sum(freq * log(Prob)) +} +parnames(segmentsNLL3) <- c("cov_sm", "cov_sl", "cov_ml", "p_s", "p_m", "p_l") + +prob.by.comb.corr.all <- function(results, pars){ + both_sm <- results[1] - results[2] + both_sl <- results[1] - results[3] + both_ml <- results[2] - results[3] + + cov_sm <- pars[1] + cov_sl <- pars[2] + cov_ml <- pars[3] + + p_seg <- pars[4:6] + + prod(p_seg^results * (1-p_seg)^(1-results)) + + cov_sm * (-1)^both_sm + + cov_sl * (-1)^both_sl + + cov_ml * (-1)^both_ml +} + +# fit model --------------------------------------------------------------- +# Some starting values trigger "Error in optim: la valeur initiale dans 'vmin' n'est pas finie" +# There are also some warnings "In log(Prob) : production de NaN" +segmod.corr <- mle2(minuslogl = segmentsNLL3, + start = c( cov_sm = 0.02, cov_sl = 0.02, cov_ml = 0.02, p_s = 0.21, p_m = 0.17, p_l = 0.25), + data = list(mat = mat, freq = freq )) +cov_pars <- segmod.corr@coef[c("cov_sm", "cov_sl", "cov_ml")] +p_seg <- 1/(1 + exp(-segmod.corr@coef[c("p_s","p_m","p_l")])) +fits = apply(mat, 1, prob.by.comb.corr.all, pars = c(cov_pars, p_seg)) +fits = sum(freq) * fits + +h=barplot(fits, ylim = c(0,2900), names.arg = names(freq)) +points(h,freq, pch = 17) diff --git a/Output/segment_fit_with_correlation_cov.png b/Output/segment_fit_with_correlation_cov.png new file mode 100644 index 0000000000000000000000000000000000000000..9fadbbb6937cfdf1e7410920ffa72fa29234d75f Binary files /dev/null and b/Output/segment_fit_with_correlation_cov.png differ diff --git a/Output/segment_fit_with_correlation_cov_pseg.png b/Output/segment_fit_with_correlation_cov_pseg.png new file mode 100644 index 0000000000000000000000000000000000000000..d7b2335b0c0aa7cfbff72e0479822f5d9e82a50f Binary files /dev/null and b/Output/segment_fit_with_correlation_cov_pseg.png differ