diff --git a/Code/Rift_WithinHost_Functions_Build_BasicModel.R b/Code/Rift_WithinHost_Functions_Build_BasicModel.R index d35dc9c4904248e12dbcaa30b6daf77e32e605c5..f80019aeaba95a53a99d719a93cba852fa4dc19e 100644 --- a/Code/Rift_WithinHost_Functions_Build_BasicModel.R +++ b/Code/Rift_WithinHost_Functions_Build_BasicModel.R @@ -13,6 +13,7 @@ ## Load source files ------------------ ## ------------------------------- + simulate_basic_model <- function (U = 1e+07, I = 0, Vi = 1, Vni = 0, b = 1e-7, dI = 1, dV = 5, p = 0.12, w = 1e+04, tstart = 0, tfinal = 50, dt = 0.1) diff --git a/Code/Rift_WithinHost_Functions_Likelihood.R b/Code/Rift_WithinHost_Functions_Likelihood.R index 336be5e4509bcbd67e73a296f6d1d7aed787f589..a6c4cb9619392c2f0ac689b059ea330387a14680 100644 --- a/Code/Rift_WithinHost_Functions_Likelihood.R +++ b/Code/Rift_WithinHost_Functions_Likelihood.R @@ -82,23 +82,15 @@ model.point.likelihood <- function(data.point, model.point, log=FALSE){ return(ifelse(log, likelihood.sum, exp(likelihood.sum))) } -logLik.trajectory <- function(params, model, init.state, data, point.likelihood.function){ - allpars = c(init.state, params, - tstart = 0, tfinal = max(data$xvals), dt = 0.1) - # print(allpars) - # browser() - odeout <- do.call(model, as.list(allpars)) - modelpred <- odeout$ts[match(data$xvals, odeout$ts[,"time"]), c("Vi","Vni")] - # browser() - # print(params) +logLik.trajectory <- function(params, model.output, init.state, data, point.likelihood.function){ ll = 0 - if(all(is.na(modelpred$Vi)) & all(is.na(modelpred$Vni))){ + if(all(is.na(model.output$Vi)) & all(is.na(model.output$Vni))){ ll <- -1e+10 # print("traj NA") }else{ for (x in 1:nrow(data)) { data.point <- unlist(data[x, ]) - model.point <- unlist(modelpred[x, ]) + model.point <- unlist(model.output[x, ]) ll <- ll + point.likelihood.function(data.point = data.point, model.point = model.point, log = TRUE) } @@ -106,32 +98,5 @@ logLik.trajectory <- function(params, model, init.state, data, point.likelihood. return(ll) } -# logLik.creator <- function(model, point.likelihood.function){ -# logLik.wrapper <- function(params, data, init){ -# return(logLik.trajectory(model = model, -# point.likelihood.function = point.likelihood.function, -# params = params, -# init = init, -# data = data)) -# } -# return(logLik.wrapper) -# } - -# Only meant for simulated data (from known parameter values), as a sanity check -plot.likelihood.profiles <- function(parnames, truepars, model, init, simdata, point.likelihood.function, seqpars){ - for(i in parnames){ - # print(i) - LL <- NULL - for(p in seqpars[[i]]){ - # print(p) - theta <- truepars - theta[[i]] <- p - ll <- logLik.trajectory(params = theta, model = model, init.state = init, data = simdata, point.likelihood.function = point.likelihood.function) - LL <- c(LL, ll) - } - plot(seqpars[[i]], LL, xlab = i, ylab = "logLik", main = i) - abline(v = truepars[[i]]) - } -} diff --git a/Code/Rift_WithinHost_Functions_MCMC.R b/Code/Rift_WithinHost_Functions_MCMC.R index 76e57ffc7e47311d3ea2fb1037bdad2c8d59daa6..6eaa6f4d1d372ca14d5a98f9d5da9b41eb0e7cc2 100644 --- a/Code/Rift_WithinHost_Functions_MCMC.R +++ b/Code/Rift_WithinHost_Functions_MCMC.R @@ -13,31 +13,17 @@ # # # ## Load source files ------------------ -# source('.R') +source('Rift_WithinHost_Functions_Likelihood.R') ## ------------------------------- -prior.creator <- function(parmin, parmax, log){ - prior.wrapper <- function(params, log = log){ - return(compute.priors(params = params, - # parnames = parnames, - parmin = parmin, - parmax = parmax, - log = log)) - } - return(prior.wrapper) -} - # TO DO : test beta distribution (for params between 0 and 1, to be more informative than uniform) -compute.priors <- function(params, parmin, parmax, log = FALSE){ - parnames <- names(params) +compute.prior <- function(params, parnames, parmin, parmax, log = FALSE){ small.step <- 1e-15 min.allowed <- -60000 log.sum <- 0 - # names(params) <- parnames - # names(parmin) <- parnames - # names(parmax) <- parnames - for(i in parnames){ #beware if 1/omega is estimated, do not allow 0 + names(params) <- parnames + for(i in parnames){ tested.value <- params[[i]] min <- parmin[[i]] max <- parmax[[i]] @@ -49,48 +35,34 @@ compute.priors <- function(params, parmin, parmax, log = FALSE){ return(ifelse(log, log.sum, exp(log.sum))) } -compute.posterior <- function(model, model.prior.function, params, parnames, init.state, data, ll.point.function) -{ - names(params) <- parnames - log.prior <- model.prior.function(params = params, log = TRUE) +compute.posterior <- function(model.output, + params, parnames, parmin, parmax, + init.state, data, likelihood.function){ + log.prior <- compute.prior(params = params, parnames = parnames, parmin = parmin, parmax = parmax, log = TRUE) if (is.finite(log.prior)) { - log.likelihood <- logLik.trajectory(model = model, params = params, + log.likelihood <- logLik.trajectory(model.output = model.output, params = params, init.state = init.state, data = data, - point.likelihood.function = ll.point.function) + point.likelihood.function = likelihood.function) } else { log.likelihood <- -Inf } log.posterior <- log.prior + log.likelihood + # check why posterior does not peak at the same place as likelihood alone + # print("prior") + # print(log.prior) + # print("ll") + # print(log.likelihood) + # print("ratio") + # print(log.prior/log.likelihood) return(log.posterior) } -posterior.creator <- function(model, model.prior.function, ll.point.function, init.state, data){ - posterior.wrapper <- function(params, parnames){ - return(compute.posterior(model = model, model.prior.function = model.prior.function, - params = params, parnames = parnames, init.state = init.state, data = data, - ll.point.function = ll.point.function)) - } - return(posterior.wrapper) -} - -plot.posterior.profiles <- function(parnames, truepars, posterior.function, seqpars){ - for(i in parnames){ - # print(i) - LL <- NULL - for(p in seqpars[[i]]){ - # print(p) - theta <- truepars - theta[[i]] <- p - ll <- posterior.function(params = theta, parnames = parnames) - LL <- c(LL, ll) - } - plot(seqpars[[i]], LL, xlab = i, ylab = "Posterior", main = i) - abline(v = truepars[[i]]) - } -} - -my.mcmc <- function(nbIteration, init.param, sdProposal, calculateLL){ +my.mcmc <- function(nbIteration, model.solve.ode, + init.param, sdProposal, + parmin, parmax, #limits for uniform priors + likelihood.function, + init.state, data){ parnames <- names(init.param) nbParam <- length(init.param) @@ -101,7 +73,18 @@ my.mcmc <- function(nbIteration, init.param, sdProposal, calculateLL){ iteration<-1 parameters[iteration,] <- init.param #initial value - logLik[iteration]<-calculateLL(params = parameters[iteration,], parnames = parnames) + + # gather parameters + allpars = c(init.state, parameters[iteration,], + tstart = 0, tfinal = max(data$xvals), dt = 0.1) + # run the model + odeout <- do.call(model.solve.ode, as.list(allpars)) + # extract only the compartments we fit, at the timesteps present in the data + modelpred <- odeout$ts[match(data$xvals, odeout$ts[,"time"]), c("Vi","Vni")] + # compute posterior + logLik[iteration] <- compute.posterior(model.output = modelpred, + params = parameters[iteration,], parnames = parnames, parmin = parmin, parmax = parmax, + init.state = init.state, data = data, likelihood.function = likelihood.function) for(iteration in 2:nbIteration){ # print(iteration) @@ -112,7 +95,15 @@ my.mcmc <- function(nbIteration, init.param, sdProposal, calculateLL){ # browser() newParam<-oldParam*exp(sdProposal[iParam]*rnorm(1)) # -Inf ; Inf : need to be cautious, prior should be enough to avoid issues, as long as you don't have NA returned parameters[iteration,iParam]<-newParam - newLogLik<-calculateLL(parameters[iteration,], parnames = parnames) + + allpars = c(init.state, parameters[iteration,], + tstart = 0, tfinal = max(data$xvals), dt = 0.1) + odeout <- do.call(model.solve.ode, as.list(allpars)) + modelpred <- odeout$ts[match(data$xvals, odeout$ts[,"time"]), c("Vi","Vni")] + + newLogLik<-compute.posterior(model.output = modelpred, + params = parameters[iteration,], parnames = parnames, parmin = parmin, parmax = parmax, + init.state = init.state, data = data, likelihood.function = likelihood.function) # you might want to change the order of parameters to avoid correlation problems # browser() if(is.finite(newParam)){ @@ -130,7 +121,47 @@ my.mcmc <- function(nbIteration, init.param, sdProposal, calculateLL){ return(list(logLik = logLik, accept = accept, parameters = parameters)) } -get.optimal.sdProposal <- function(model.posterior, init.param, finetune.Iterations, burnIn = 1, ft.sdProposals, +# Only meant for simulated data (from known parameter values), as a sanity check +plot.profiles <- function(parnames, truepars, seqpars, + model.solve.ode, init.state, + parmin, parmax, data, + likelihood.function, + type = "likelihood"){ + for(i in parnames){ + # print(i) + LL <- NULL + for(p in seqpars[[i]]){ + # print(p) + theta <- truepars + theta[[i]] <- p + # gather parameters + allpars = c(init.state, theta, + tstart = 0, tfinal = max(data$xvals), dt = 0.1) + # run the model + odeout <- do.call(model.solve.ode, as.list(allpars)) + # extract only the compartments we fit, at the timesteps present in the data + modelpred <- odeout$ts[match(data$xvals, odeout$ts[,"time"]), c("Vi","Vni")] + if(type == "posterior"){ + ll <- compute.posterior(model.output = modelpred, + params = theta, parnames = parnames, parmin = parmin, parmax = parmax, + init.state = init.state, data = data, likelihood.function = likelihood.function) + }else if(type == "likelihood"){ + ll <- logLik.trajectory(params = theta, model.output = modelpred, + init.state = init.state, data = data, + point.likelihood.function = likelihood.function) + } + LL <- c(LL, ll) + } + plot(seqpars[[i]], LL, xlab = i, ylab = type, main = i) + abline(v = truepars[[i]]) + } +} + +get.optimal.sdProposal <- function(init.param, finetune.Iterations, burnIn = 1, ft.sdProposals, + model.solve.ode, + parmin, parmax, #limits for uniform priors + likelihood.function, + init.state, data, verbose = 0){ if (verbose == 1) { browser() } maxReps = 15 # how often are you going to adjust to get to the right acceptance rate? @@ -146,10 +177,13 @@ get.optimal.sdProposal <- function(model.posterior, init.param, finetune.Iterati print(paste("Fine tuning ",counter)) # HERE PUT YOUR MCMC-function with inputs#### tic("mcmcMH") - my_mcmc <- my.mcmc(nbIteration = finetune.Iterations, + my_mcmc <- my.mcmc(sdProposal = ft.sdProposals, + nbIteration = finetune.Iterations, init.param = init.param, - sdProposal = ft.sdProposals, - calculateLL = model.posterior) + model.solve.ode = model.solve.ode, + parmin = parmin, parmax = parmax, #limits for uniform priors + likelihood.function = likelihood.function, + init.state = init.state, data = data) toc(log = TRUE) acc.matrix <- my_mcmc$accept # acc.matrix <- as.data.frame(acc.matrix) diff --git a/Code/Rift_WithinHost_Main_MCMC.R b/Code/Rift_WithinHost_Main_MCMC.R index e4c374ad03e8c2867d6946aaa130f55ba7c3532b..9fd3b5c4ee08d0cb6f9e4880c9746e7fa90dff2e 100644 --- a/Code/Rift_WithinHost_Main_MCMC.R +++ b/Code/Rift_WithinHost_Main_MCMC.R @@ -19,7 +19,6 @@ getwd() ## Load source files ------------------ source('Rift_WithinHost_Functions_Build_BasicModel.R') -source('Rift_WithinHost_Functions_Likelihood.R') source("Rift_WithinHost_Functions_MCMC.R") load("../Output/basic_model_sim_data_params.RData") # run Rift_WithinHost_Main_SimulateData_BasicModel.R if you don't have this file yet load("../Output/global_params.RData") @@ -28,28 +27,11 @@ load("../Output/global_params.RData") sim.data <- read.csv("../Output/basic_model_sim_data_dead_sheep.csv") # run Rift_WithinHost_Main_SimulateData_BasicModel.R if you don't have this file yet ## ------------------------------- -# ------- MAIN --------- -basic.model.prior <- prior.creator(parmin = c(b = 1e-9, dI = 0.1, w = 1000, p = 1e-4, dV = 0.1), - parmax = c(b = 1e-7, dI = 15, w = 50000, p = 2e-1, dV = 15), - log = TRUE) #parnames = c("b","dI", "w", "p", "dV"), +parmin = c(b = 1e-9, dI = 0.1, w = 1000, p = 1e-4, dV = 0.1) +parmax = c(b = 1e-7, dI = 15, w = 50000, p = 2e-1, dV = 15) +parnames = c("b","dI", "w", "p", "dV") -# basic.model.ll <- logLik.creator(model = simulate_basic_model, point.likelihood.function = model.point.likelihood) - -init.state <- Y0 - -#fonction to calculate LL for set of parameters -calculateLL <- posterior.creator(model = simulate_basic_model, model.prior.function = basic.model.prior, - ll.point.function = model.point.likelihood, init.state = init.state, data = sim.data) - -init.param <- c(b = 1e-8, dI = 1, w = 8e3, p = 1e-2, dV = 1) -sdProposal <- c(b = 1e-7, dI = 0.5, w = 2e3, p = 2e-4, dV = 0.5) # dI = dV = 0.5 give good acceptance rates -# you will run nbIteration * nbParam simulations : possibly quite long! -# tic() -# results <- my.mcmc(nbIteration = 1000, -# init.param = init.param, -# sdProposal = sdProposal, -# calculateLL = calculateLL) -# toc(log = TRUE) +init.state <- Y0 # from basic_model_sim_data_params b.seq <- c(seq(5:9)*10**(-9), seq(1:9)*10**(-8), seq(1:9)*10**(-7)) dI.seq <- c(seq(1,9)*10**(-2), seq(1,9)*10**(-1), seq(1,10)) @@ -64,14 +46,41 @@ seqpars[["p"]] <- p.seq seqpars[["w"]] <- w.seq truepars <- modelpars -plot.posterior.profiles(parnames, truepars, calculateLL, seqpars) +# Bot type of profiles peak at the right place, only B and dI seem quite flat in the posterior compared to likelihood alone? + +plot.profiles(parnames = parnames, truepars = truepars, seqpars = seqpars, + model.solve.ode = simulate_basic_model, init.state = init.state, + parmin = parmin, parmax = parmax, data = sim.data, + likelihood.function = model.point.likelihood, + type = "likelihood") + +plot.profiles(parnames = parnames, truepars = truepars, seqpars = seqpars, + model.solve.ode = simulate_basic_model, init.state = init.state, + parmin = parmin, parmax = parmax, data = sim.data, + likelihood.function = model.point.likelihood, + type = "posterior") + +init.param <- c(b = 1e-8, dI = 1, w = 8e3, p = 1e-2, dV = 1) +sdProposal <- c(b = 1e-7, dI = 0.5, w = 2e3, p = 2e-4, dV = 0.5) # dI = dV = 0.5 give good acceptance rates +# you will run nbIteration * nbParam simulations : possibly quite long! +tic() +results <- my.mcmc(nbIteration = 1000, model.solve.ode = simulate_basic_model, + init.param = init.param, sdProposal = sdProposal, + parmin = parmin, parmax = parmax, #limits for uniform priors + likelihood.function = model.point.likelihood, + init.state = init.state, data = sim.data) +toc(log = TRUE) # tic() -# results <- get.optimal.sdProposal(model.posterior = calculateLL, -# init.param = init.param, -# finetune.Iterations = 500, burnIn = 100, -# ft.sdProposals = sdProposal) +# results <- get.optimal.sdProposal(init.param = init.param, finetune.Iterations = 500, burnIn = 100, +# ft.sdProposals = sdProposal, +# model.solve.ode = simulate_basic_model, +# parmin = parmin, parmax = parmax, #limits for uniform priors +# likelihood.function = model.point.likelihood, +# init.state = init.state, data = sim.data) # toc(log = TRUE) + + # test <- results$mcmc # for(i in 1:length(init.param)){ # plot(test$parameters[,i], main = names(init.param[i]), type = "l") diff --git a/Code/mcmc_wrapper_style/Example_MCMC_structure.R b/Code/mcmc_wrapper_style/Example_MCMC_structure.R new file mode 100644 index 0000000000000000000000000000000000000000..606d972207fdcc93431cb53110c1d3b0d37daa43 --- /dev/null +++ b/Code/mcmc_wrapper_style/Example_MCMC_structure.R @@ -0,0 +1,65 @@ +nbParam<-9 +param<-c(.3,rep(0.75,nbParam-1)) +sdProposal<-rep(0.1,nbParam) + + +#########calculate LL & MCMC +# include calculatePosterior and calculatePrior +calculateLL<-function(param) +{LL<-0 + #fonction to calculate LL for set of parameters + return(LL) +} +# create a function for this +nbIteration<-5000 # you really run nbIteration * nbParam simulations : possibly quite long! +burnIn<-100 +# matrices to store everything +logLik<-rep(0,nbIteration) +accept<-matrix(0,ncol=nbParam,nrow=nbIteration) +parameters<-matrix(0,ncol=nbParam,nrow=nbIteration) + +iteration<-1 +parameters[iteration,]<-param #initial value + + +logLik[iteration]<-calculateLL(parameters[iteration,]) +for(iteration in 2:nbIteration) +{parameters[iteration,]<-parameters[iteration-1,] + logLik[iteration]<-logLik[iteration-1] + for(iParam in 1:nbParam) + {oldParam<-parameters[iteration,iParam] + newParam<-oldParam*exp(sdProposal[iParam]*rnorm(1)) # -Inf ; Inf : need to be cautious, prior should be enough to avoid issues, as long as you don't have NA returned + + parameters[iteration,iParam]<-newParam + newLogLik<-calculateLL(parameters[iteration,]) + # you might want to change the order of parameters to avoid correlation problems (?) + if(log(runif(1))<newLogLik-logLik[iteration]+log(newParam)-log(oldParam)) + {logLik[iteration]<-newLogLik + accept[iteration,iParam]<-1 + } + else{parameters[iteration,iParam]<-oldParam + } + } +} + +#calculateDIC +if(nbParam==1) meanParam<-median(parameters[-(1:burnIn)]) else meanParam<-apply(parameters[-(1:burnIn),],2,median) + +deviance.meanParam<--2*calculateLL(meanParam) +meanDeviance<-(-2)*mean(logLik[-(1:burnIn)]) +pD<-meanDeviance-deviance.meanParam +DIC<-pD+meanDeviance + + +########## + + +mat<-matrix(1:6,ncol=2,nrow=3,byrow=TRUE) +layout(mat,widths=lcm(rep(6,2)),heights=lcm(rep(6,3))) +par(mar=c(2,2,2,0)) + +plot(logLik[-(1:burnIn)],main="LL") + +for(parID in 1:nbParam) {plot(parameters[-(1:burnIn),parID],main=parID)} + +apply(parameters[-(1:burnIn),],2,quantile,prob=c(0.025,0.5,0.975)) diff --git a/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_Build_BasicModel.R b/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_Build_BasicModel.R new file mode 100644 index 0000000000000000000000000000000000000000..f80019aeaba95a53a99d719a93cba852fa4dc19e --- /dev/null +++ b/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_Build_BasicModel.R @@ -0,0 +1,41 @@ +## --------------------------- +## +## Script name: Rift_WithinHost_Functions_Build_BasicModel.R +## +## Purpose of script: +## +## Author: Helene Cecilia +## +## Date Created: 2020-09-24 + +## Loading Packages ------------------ + +## Load source files ------------------ + +## ------------------------------- + +simulate_basic_model <- function (U = 1e+07, I = 0, Vi = 1, Vni = 0, + b = 1e-7, dI = 1, dV = 5, p = 0.12, w = 1e+04, + tstart = 0, tfinal = 50, dt = 0.1) +{ + basicvirus_ode_fct <- function(t, y, parms) { + with(as.list(c(y, parms)), { + dU = - b * Vi * U + dI = b * Vi * U - dI * I + dVi = w * p * I - dV * Vi - b * Vi * U + dVni = w * (1-p) * I - dV * Vni + list(c(dU, dI, dVi, dVni)) + }) + } + timevec = seq(tstart, tfinal, by = dt) + vars = c(U = U, I = I, Vi = Vi, Vni = Vni) + pars = c(dI = dI, dV = dV, b = b, p = p, + w = w) + odeout = deSolve::ode(y = vars, parms = pars, times = timevec, + func = basicvirus_ode_fct, atol = 1e-30, rtol = 1e-10, maxsteps = 50000) + result <- list() + trajectory <- as.data.frame(odeout) + trajectory[trajectory<0] <- 0 + result$ts <- trajectory + return(result) +} \ No newline at end of file diff --git a/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_Likelihood.R b/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_Likelihood.R new file mode 100644 index 0000000000000000000000000000000000000000..336be5e4509bcbd67e73a296f6d1d7aed787f589 --- /dev/null +++ b/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_Likelihood.R @@ -0,0 +1,137 @@ +## --------------------------- +## +## Script name: Rift_WithinHost_Functions_Likelihood.R +## +## Purpose of script: Functions to compute likelihood per model point +## Likelihood of TCID and RNA measures in two separate functions +## which are then combined in model_pointLike +## plot.likelihood.profiles is meant to check that the likelihood peaks at the right place +## when we know parameters value (simulated data) +## +## Author: Helene Cecilia +## +## Date Created: 2020-09-24 + +## Loading Packages ------------------ +library(deSolve) +library(ggplot2) +library(latex2exp) + +## Load source files ------------------ + +## ------------------------------- + +TCID50_normal_log10_ll <- function(data.point, model.point, penalty){ + if(!is.na(model.point[["Vi"]]) & !is.na(model.point[["Vni"]])){ + if(!is.na(data.point[["outcomeTCID"]])){ + if(data.point[["outcomeTCID"]] != LOD.TCID){ + V.good.likelihood <- dnorm(x = data.point[["outcomeTCID"]], + mean = log10(model.point[["Vi"]]), + sd = 1, + log = TRUE) + }else{ + V.good.likelihood <- pnorm(q = LOD.TCID, + mean = log10(model.point[["Vi"]]), + sd = 1, + log.p = TRUE) + } + }else{ + V.good.likelihood <- 0 + } + }else{ + V.good.likelihood <- penalty + } + if(is.na(V.good.likelihood)){ + browser() + } + return(V.good.likelihood) +} + +RNA_normal_log10_ll <- function(data.point, model.point, penalty){ + if(!is.na(model.point[["Vi"]]) & !is.na(model.point[["Vni"]])){ + if(!is.na(data.point[["outcomeRNA"]])){ + if(data.point[["outcomeRNA"]] != LOD.RNA){ + V.likelihood <- dnorm(x = (data.point[["outcomeRNA"]]), + mean = log10(model.point[["Vni"]] + model.point[["Vi"]]), + sd = 1, + log = TRUE) + }else{ + V.likelihood <- pnorm(q = LOD.RNA, + mean = log10(model.point[["Vni"]] + model.point[["Vi"]]), + sd = 1, + log.p = TRUE) + } + }else{ + V.likelihood <- 0 + } + }else{ + V.likelihood <- penalty + } + if(is.na(V.likelihood)){ + browser() + } + return(V.likelihood) +} + +## function to compute the likelihood of one data point +model.point.likelihood <- function(data.point, model.point, log=FALSE){ + V.likelihood <- RNA_normal_log10_ll(data.point, model.point, penalty) + V.good.likelihood <- TCID50_normal_log10_ll(data.point, model.point, penalty) + + likelihood.sum <- V.likelihood + V.good.likelihood + return(ifelse(log, likelihood.sum, exp(likelihood.sum))) +} + +logLik.trajectory <- function(params, model, init.state, data, point.likelihood.function){ + allpars = c(init.state, params, + tstart = 0, tfinal = max(data$xvals), dt = 0.1) + # print(allpars) + # browser() + odeout <- do.call(model, as.list(allpars)) + modelpred <- odeout$ts[match(data$xvals, odeout$ts[,"time"]), c("Vi","Vni")] + # browser() + # print(params) + ll = 0 + if(all(is.na(modelpred$Vi)) & all(is.na(modelpred$Vni))){ + ll <- -1e+10 + # print("traj NA") + }else{ + for (x in 1:nrow(data)) { + data.point <- unlist(data[x, ]) + model.point <- unlist(modelpred[x, ]) + ll <- ll + point.likelihood.function(data.point = data.point, + model.point = model.point, log = TRUE) + } + } + return(ll) +} + +# logLik.creator <- function(model, point.likelihood.function){ +# logLik.wrapper <- function(params, data, init){ +# return(logLik.trajectory(model = model, +# point.likelihood.function = point.likelihood.function, +# params = params, +# init = init, +# data = data)) +# } +# return(logLik.wrapper) +# } + +# Only meant for simulated data (from known parameter values), as a sanity check +plot.likelihood.profiles <- function(parnames, truepars, model, init, simdata, point.likelihood.function, seqpars){ + for(i in parnames){ + # print(i) + LL <- NULL + for(p in seqpars[[i]]){ + # print(p) + theta <- truepars + theta[[i]] <- p + ll <- logLik.trajectory(params = theta, model = model, init.state = init, data = simdata, point.likelihood.function = point.likelihood.function) + LL <- c(LL, ll) + } + plot(seqpars[[i]], LL, xlab = i, ylab = "logLik", main = i) + abline(v = truepars[[i]]) + } +} + + diff --git a/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_MCMC.R b/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_MCMC.R new file mode 100644 index 0000000000000000000000000000000000000000..f249c30b330a71b8b8708c5c60ca67a59816c8ba --- /dev/null +++ b/Code/mcmc_wrapper_style/Rift_WithinHost_Functions_MCMC.R @@ -0,0 +1,188 @@ +## --------------------------- +## +## Script name: +## +## Purpose of script: +## +## Author: Helene Cecilia +## +## Date Created: 2020-10-06 + +## Loading Packages ------------------ +# library() +# +# +# ## Load source files ------------------ +# source('.R') + +## ------------------------------- + +# TO DO : test beta distribution (for params between 0 and 1, to be more informative than uniform) +compute.priors <- function(params, parmin, parmax, log = FALSE){ + parnames <- names(params) + small.step <- 1e-15 + min.allowed <- -60000 + log.sum <- 0 + # names(params) <- parnames + # names(parmin) <- parnames + # names(parmax) <- parnames + for(i in parnames){ #beware if 1/omega is estimated, do not allow 0 + tested.value <- params[[i]] + min <- parmin[[i]] + max <- parmax[[i]] + log.prior <- max(min.allowed, log(punif(tested.value+small.step, min = min, max = max, log=FALSE) - + punif(tested.value-small.step, min = min, max = max, log=FALSE))) + + log.sum <- log.sum + log.prior + } + return(ifelse(log, log.sum, exp(log.sum))) +} + +prior.creator <- function(parmin, parmax, log){ + prior.wrapper <- function(params, log = log){ + return(compute.priors(params = params, + # parnames = parnames, + parmin = parmin, + parmax = parmax, + log = log)) + } + return(prior.wrapper) +} + +compute.posterior <- function(model, model.prior.function, params, parnames, init.state, data, ll.point.function) +{ + names(params) <- parnames + log.prior <- model.prior.function(params = params, log = TRUE) + if (is.finite(log.prior)) { + log.likelihood <- logLik.trajectory(model = model, params = params, + init.state = init.state, data = data, + point.likelihood.function = ll.point.function) + } + else { + log.likelihood <- -Inf + } + log.posterior <- log.prior + log.likelihood + print("prior") + print(log.prior) + print("ll") + print(log.likelihood) + print("ratio") + print(log.prior/log.likelihood) + return(log.posterior) +} + +posterior.creator <- function(model, model.prior.function, ll.point.function, init.state, data){ + posterior.wrapper <- function(params, parnames){ + return(compute.posterior(model = model, model.prior.function = model.prior.function, + params = params, parnames = parnames, init.state = init.state, data = data, + ll.point.function = ll.point.function)) + } + return(posterior.wrapper) +} + +plot.posterior.profiles <- function(parnames, truepars, posterior.function, seqpars){ + for(i in parnames){ + # print(i) + LL <- NULL + for(p in seqpars[[i]]){ + # print(p) + theta <- truepars + theta[[i]] <- p + ll <- posterior.function(params = theta, parnames = parnames) + LL <- c(LL, ll) + } + plot(seqpars[[i]], LL, xlab = i, ylab = "Posterior", main = i) + abline(v = truepars[[i]]) + } +} + +# TO DO : run model outside of logLik (one function does only one thing) +my.mcmc <- function(nbIteration, init.param, sdProposal, calculateLL){ + parnames <- names(init.param) + nbParam <- length(init.param) + + # matrices to store everything + logLik<-rep(0,nbIteration) + accept<-matrix(0,ncol=nbParam,nrow=nbIteration) + parameters<-matrix(0,ncol=nbParam,nrow=nbIteration) + + iteration<-1 + parameters[iteration,] <- init.param #initial value + logLik[iteration]<-calculateLL(params = parameters[iteration,], parnames = parnames) + + for(iteration in 2:nbIteration){ + # print(iteration) + parameters[iteration,]<-parameters[iteration-1,] + logLik[iteration]<-logLik[iteration-1] + for(iParam in 1:nbParam){ + oldParam<-parameters[iteration,iParam] + # browser() + newParam<-oldParam*exp(sdProposal[iParam]*rnorm(1)) # -Inf ; Inf : need to be cautious, prior should be enough to avoid issues, as long as you don't have NA returned + parameters[iteration,iParam]<-newParam + newLogLik<-calculateLL(parameters[iteration,], parnames = parnames) + # you might want to change the order of parameters to avoid correlation problems + # browser() + if(is.finite(newParam)){ + if(log(runif(1))<newLogLik-logLik[iteration]+log(newParam)-log(oldParam)){ + logLik[iteration]<-newLogLik + accept[iteration,iParam]<-1 + }else{ + parameters[iteration,iParam]<-oldParam + } + }else{ # if sdProposal poorly chosen, Inf can be sampled + parameters[iteration,iParam]<-oldParam + } + } + } + return(list(logLik = logLik, accept = accept, parameters = parameters)) +} + +get.optimal.sdProposal <- function(model.posterior, init.param, finetune.Iterations, burnIn = 1, ft.sdProposals, + verbose = 0){ + if (verbose == 1) { browser() } + maxReps = 15 # how often are you going to adjust to get to the right acceptance rate? + lower.bound = .1 # lowest acceptable acceptance + upper.bound = .45 # highest + counter = 0; + decayStart = maxReps / 4; + delta0 = 2.0; + delta = delta0; + while (TRUE) { + counter = counter + 1 + if (counter >= decayStart) delta = delta0 / (counter - decayStart + 1); + print(paste("Fine tuning ",counter)) + # HERE PUT YOUR MCMC-function with inputs#### + tic("mcmcMH") + my_mcmc <- my.mcmc(nbIteration = finetune.Iterations, + init.param = init.param, + sdProposal = ft.sdProposals, + calculateLL = model.posterior) + toc(log = TRUE) + acc.matrix <- my_mcmc$accept + # acc.matrix <- as.data.frame(acc.matrix) + # names(acc.matrix) <- names(init.param) + # browser() + isOK = TRUE; + tmp_ft.sdProposals <- NULL + for (ii in 1:length(init.param)) { + ind = names(init.param)[ii] + if (ft.sdProposals[ind] > 0.0) { + curr_AR = sum(acc.matrix[-c(1:burnIn),ii]) / (finetune.Iterations - burnIn) # Note that here, I assume the out[[3]] contains a matrix of 0 for not accepted and 1 for accepted + # that would be your acceptance matrix. I think fitR gives that too. Otherwise, you'd want to create this using the diff-function of similar (0 if no diff, 1 if there is) + # tricky thing here is that fitR changes all params at the same time. + print(curr_AR) + diff = curr_AR - 0.23; + # Adjust random walk step + tmp_ft.sdProposals <- c(tmp_ft.sdProposals, ft.sdProposals[ind] * ( 1.0 + delta * diff )); + isOK = isOK && (curr_AR >= lower.bound & curr_AR <= upper.bound); + } + } + if (isOK || counter >= maxReps){ + break; # it stops if all parameters have acceptance rates within the upper and lower bounds + }else{ + ft.sdProposals <- tmp_ft.sdProposals + } + print(ft.sdProposals) + } + return(list("mcmc" = my_mcmc, "sd" = ft.sdProposals)) +} \ No newline at end of file diff --git a/Code/mcmc_wrapper_style/Rift_WithinHost_Main_MCMC.R b/Code/mcmc_wrapper_style/Rift_WithinHost_Main_MCMC.R new file mode 100644 index 0000000000000000000000000000000000000000..e4c374ad03e8c2867d6946aaa130f55ba7c3532b --- /dev/null +++ b/Code/mcmc_wrapper_style/Rift_WithinHost_Main_MCMC.R @@ -0,0 +1,104 @@ +## --------------------------- +## +## Script name: +## +## Purpose of script: +## +## Author: Helene Cecilia +## +## Date Created: 2020-10-06 + +rm(list=ls()) + +## Loading Packages ------------------ +library(tictoc) + +## Set Work Directory ------------------------------------------------------ +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location +getwd() + +## Load source files ------------------ +source('Rift_WithinHost_Functions_Build_BasicModel.R') +source('Rift_WithinHost_Functions_Likelihood.R') +source("Rift_WithinHost_Functions_MCMC.R") +load("../Output/basic_model_sim_data_params.RData") # run Rift_WithinHost_Main_SimulateData_BasicModel.R if you don't have this file yet +load("../Output/global_params.RData") + +## ------------------------------- +sim.data <- read.csv("../Output/basic_model_sim_data_dead_sheep.csv") # run Rift_WithinHost_Main_SimulateData_BasicModel.R if you don't have this file yet +## ------------------------------- + +# ------- MAIN --------- +basic.model.prior <- prior.creator(parmin = c(b = 1e-9, dI = 0.1, w = 1000, p = 1e-4, dV = 0.1), + parmax = c(b = 1e-7, dI = 15, w = 50000, p = 2e-1, dV = 15), + log = TRUE) #parnames = c("b","dI", "w", "p", "dV"), + +# basic.model.ll <- logLik.creator(model = simulate_basic_model, point.likelihood.function = model.point.likelihood) + +init.state <- Y0 + +#fonction to calculate LL for set of parameters +calculateLL <- posterior.creator(model = simulate_basic_model, model.prior.function = basic.model.prior, + ll.point.function = model.point.likelihood, init.state = init.state, data = sim.data) + +init.param <- c(b = 1e-8, dI = 1, w = 8e3, p = 1e-2, dV = 1) +sdProposal <- c(b = 1e-7, dI = 0.5, w = 2e3, p = 2e-4, dV = 0.5) # dI = dV = 0.5 give good acceptance rates +# you will run nbIteration * nbParam simulations : possibly quite long! +# tic() +# results <- my.mcmc(nbIteration = 1000, +# init.param = init.param, +# sdProposal = sdProposal, +# calculateLL = calculateLL) +# toc(log = TRUE) + +b.seq <- c(seq(5:9)*10**(-9), seq(1:9)*10**(-8), seq(1:9)*10**(-7)) +dI.seq <- c(seq(1,9)*10**(-2), seq(1,9)*10**(-1), seq(1,10)) +dV.seq <- c(seq(4,10), seq(11,15)) +p.seq <- c(seq(0.0005,0.005,0.0005), seq(0.01,0.1, 0.01)) +w.seq <- c(seq(10000, 50000, 1000)) +seqpars <- list() +seqpars[["b"]] <- b.seq +seqpars[["dI"]] <- dI.seq +seqpars[["dV"]] <- dV.seq +seqpars[["p"]] <- p.seq +seqpars[["w"]] <- w.seq + +truepars <- modelpars +plot.posterior.profiles(parnames, truepars, calculateLL, seqpars) + +# tic() +# results <- get.optimal.sdProposal(model.posterior = calculateLL, +# init.param = init.param, +# finetune.Iterations = 500, burnIn = 100, +# ft.sdProposals = sdProposal) +# toc(log = TRUE) +# test <- results$mcmc +# for(i in 1:length(init.param)){ +# plot(test$parameters[,i], main = names(init.param[i]), type = "l") +# print(names(init.param[i])) +# print(summary(test$parameters[,i])) +# print(sum(test$accept[,i])/1000) +# } +# hist(test$logLik) + +# #calculateDIC +# if(nbParam==1) meanParam<-median(parameters[-(1:burnIn)]) else meanParam<-apply(parameters[-(1:burnIn),],2,median) +# +# deviance.meanParam<--2*calculateLL(meanParam) +# meanDeviance<-(-2)*mean(logLik[-(1:burnIn)]) +# pD<-meanDeviance-deviance.meanParam +# DIC<-pD+meanDeviance +# +# +# ########## +# +# +# mat<-matrix(1:6,ncol=2,nrow=3,byrow=TRUE) +# layout(mat,widths=lcm(rep(6,2)),heights=lcm(rep(6,3))) +# par(mar=c(2,2,2,0)) +# +# plot(logLik[-(1:burnIn)],main="LL") +# +# for(parID in 1:nbParam) {plot(parameters[-(1:burnIn),parID],main=parID)} +# +# apply(parameters[-(1:burnIn),],2,quantile,prob=c(0.025,0.5,0.975)) diff --git a/Code/mcmc_wrapper_style/Rift_WithinHost_Main_SimulateData_BasicModel.R b/Code/mcmc_wrapper_style/Rift_WithinHost_Main_SimulateData_BasicModel.R new file mode 100644 index 0000000000000000000000000000000000000000..6496b2a7b4dc9deb58ea442d407986b89a1f70ca --- /dev/null +++ b/Code/mcmc_wrapper_style/Rift_WithinHost_Main_SimulateData_BasicModel.R @@ -0,0 +1,133 @@ +## --------------------------- +## +## Script name: +## +## Purpose of script: +## +## Author: Helene Cecilia +## +## Date Created: 2020-09-24 + +rm(list=ls()) + +## Loading Packages ------------------ +library(deSolve) +library(ggplot2) +library(latex2exp) + +## Set Work Directory ------------------------------------------------------ +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location +getwd() + +## Load source files ------------------ +source('Rift_WithinHost_Functions_Build_BasicModel.R') + +## ------------------------------- +load("../Output/global_params.RData") + +within_data <- read.csv("../Data/RITRAdata/RITRA_data_formatted.csv") + +dead.sheep <- within_data[within_data$sheep_id %in% c("X154", "X156", "X157", "X159", "X161", + "X162", "X163", "X164", "X169", "X170", + "X173", "X174"),] +dead.sheep <- dead.sheep[!is.na(dead.sheep$rna_V) | !is.na(dead.sheep$tcid_vp),] +`%notin%` <- Negate(`%in%`) + +survive.sheep <- within_data[within_data$sheep_id %notin% c("X154", "X156", "X157", "X159", "X161", + "X162", "X163", "X164", "X169", "X170", + "X173", "X174"),] + + + +Y0 = c(U = 1e8, I = 0, Vi = 1, Vni = 0) +parnames <- c("b", "dI", "w", "p", "dV") +modelpars = c(b = 8e-8, dI = 5, dV = 10, + p = 0.002, w = 2e4) +save(Y0, parnames, modelpars, file = "../Output/basic_model_sim_data_params.RData") +# names(modelpars) = parnames +xvals = seq(0, 12, 0.1) +allpars = c(Y0, tfinal = max(xvals), tstart = 0, + dt = 0.1, modelpars) +odeout <- do.call(simulate_basic_model, as.list(allpars)) +simres = odeout$ts +# simtimes <- seq(0,7) +simdata = data.frame(simres[match(unique(dead.sheep$dpi), simres[,"time"]), ]) +simdata$simresVi = log10(simdata$Vi) +simdata$simresV = log10(simdata$Vi + simdata$Vni) +simdata = subset(simdata, select = c("time", "simresVi", "simresV")) +colnames(simdata) = c("xvals", "outcomeTCID", "outcomeRNA") + +simdata[simdata$outcomeRNA <= LOD.RNA,]$outcomeRNA = LOD.RNA +simdata[simdata$outcomeTCID <= LOD.TCID,]$outcomeTCID = LOD.TCID + +ggplot() + geom_line(data = simdata, aes(x = xvals, y = outcomeTCID)) + + geom_line(data = simdata, aes(x = xvals, y = outcomeRNA)) + + geom_point(data = dead.sheep, aes(x = dpi, y = tcid_vp), color = "blue") + + geom_point(data = dead.sheep, aes(x = dpi, y = rna_V), color = "orange") + + ylab(TeX('log$_{10}$ RNA')) + + xlab("dpi") + + scale_y_continuous(expand = c(0,0), limits = c(0,11)) + + scale_x_continuous(expand = c(0,0), limits = c(0,13), breaks = c(0,3,6,9,12)) + + theme_classic() + theme(axis.text = element_text(size = 22), + axis.title = element_text(size = 22), + legend.title = element_text(size = 18), + legend.text = element_text(size = 18), + legend.position = "bottom", + legend.box = "vertical") + +write.csv(simdata, file = "../Output/basic_model_sim_data_dead_sheep.csv", row.names=F) +# ggplot() + geom_line(data = simdata, aes(x = xvals, y = outcomeTCID)) + +# geom_line(data = simdata, aes(x = xvals, y = outcomeRNA)) + +# geom_point(data = within_data, aes(x = dpi, y = tcid_vp), color = "blue") + +# geom_point(data = within_data, aes(x = dpi, y = rna_V), color = "orange") + +# ylab(TeX('log$_{10}$ RNA')) + +# xlab("dpi") + +# scale_y_continuous(expand = c(0,0), limits = c(0,11)) + +# scale_x_continuous(expand = c(0,0), limits = c(0,13), breaks = c(0,3,6,9,12)) + +# theme_classic() + theme(axis.text = element_text(size = 22), +# axis.title = element_text(size = 22), +# legend.title = element_text(size = 18), +# legend.text = element_text(size = 18), +# legend.position = "bottom", +# legend.box = "vertical") + +# ggplot() + geom_line(data = simdata, aes(x = xvals, y = outcomeTCID)) + +# geom_line(data = simdata, aes(x = xvals, y = outcomeRNA)) + +# geom_point(data = survive.sheep, aes(x = dpi, y = tcid_vp), color = "blue") + +# geom_point(data = survive.sheep, aes(x = dpi, y = rna_V), color = "orange") + +# ylab(TeX('log$_{10}$ RNA')) + +# xlab("dpi") + +# scale_y_continuous(expand = c(0,0), limits = c(0,11)) + +# scale_x_continuous(expand = c(0,0), limits = c(0,13), breaks = c(0,3,6,9,12)) + +# theme_classic() + theme(axis.text = element_text(size = 22), +# axis.title = element_text(size = 22), +# legend.title = element_text(size = 18), +# legend.text = element_text(size = 18), +# legend.position = "bottom", +# legend.box = "vertical") + +# ggplot() + geom_line(data = simdata, aes(x = xvals, y = outcomeTCID)) + +# geom_line(data = simdata, aes(x = xvals, y = outcomeRNA)) + +# geom_point(data = dead.sheep, aes(x = dpi, y = tcid_vp), color = "blue", alpha = 0.5) + +# geom_point(data = dead.sheep, aes(x = dpi, y = rna_V), color = "orange", alpha = 0.5) + +# geom_point(data = survive.sheep, aes(x = dpi, y = tcid_vp), color = "blue", shape =3, alpha = 0.5) + +# geom_point(data = survive.sheep, aes(x = dpi, y = rna_V), color = "orange", shape = 3, alpha = 0.5) + +# ylab(TeX('log$_{10}$ RNA')) + +# xlab("dpi") + +# scale_y_continuous(expand = c(0,0), limits = c(0,11)) + +# scale_x_continuous(expand = c(0,0), limits = c(0,13), breaks = c(0,3,6,9,12)) + +# theme_classic() + theme(axis.text = element_text(size = 22), +# axis.title = element_text(size = 22), +# legend.title = element_text(size = 18), +# legend.text = element_text(size = 18), +# legend.position = "bottom", +# legend.box = "vertical") + + + +# fitdata$outcome = simdata$outcome + noise * stats::runif(length(simdata$outcome), +# -1, 1) * simdata$outcome +# logvirus = c(log10(pmax(1e-10, modelpred))) # DSAIRM uses a trick to avoid negative values as well?! +# logvirus[(fitdata$outcome <= LOD & (fitdata$outcome - +# logvirus) > 0)] = LOD + diff --git a/Code/mcmc_wrapper_style/Rift_WithinHost_Main_Testing_BasicModel.R b/Code/mcmc_wrapper_style/Rift_WithinHost_Main_Testing_BasicModel.R new file mode 100644 index 0000000000000000000000000000000000000000..c0b7a274282b4c6e8da1f6b1a0c0cdff7b8ebf57 --- /dev/null +++ b/Code/mcmc_wrapper_style/Rift_WithinHost_Main_Testing_BasicModel.R @@ -0,0 +1,51 @@ +## --------------------------- +## +## Script name: Rift_Withinhost_Testing_BasicModel.R +## +## Purpose of script: Tests the MCMC framework on simulated data from the basic model +## +## Author: Helene Cecilia +## +## Date Created: 2020-09-24 + +rm(list=ls()) + +## Loading Packages ------------------ + +## Set Work Directory ------------------------------------------------------ +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location +getwd() + +## Load source files ------------------ +source('Rift_WithinHost_Functions_Likelihood.R') +source('Rift_WithinHost_Functions_Build_BasicModel.R') + +load("../Output/basic_model_sim_data_params.RData") # run Rift_WithinHost_Main_SimulateData_BasicModel.R if you don't have this file yet +load("../Output/global_params.RData") + +## ------------------------------- +sim.data <- read.csv("../Output/basic_model_sim_data_dead_sheep.csv") # run Rift_WithinHost_Main_SimulateData_BasicModel.R if you don't have this file yet +## ------------------------------- + +parnames <- parnames +truepars <- modelpars +model <- simulate_basic_model +init <- Y0 +simdata <- read.csv("../Output/basic_model_sim_data_dead_sheep.csv") +point.likelihood <- model.point.likelihood + + +# ---- check likelihood profiles, whether they peak at the right place, have flat parts ---- +b.seq <- c(seq(5:9)*10**(-9), seq(1:9)*10**(-8), seq(1:9)*10**(-7)) +dI.seq <- c(seq(1,9)*10**(-2), seq(1,9)*10**(-1), seq(1,10)) +dV.seq <- c(seq(4,10), seq(11,15)) +p.seq <- c(seq(0.0005,0.005,0.0005), seq(0.01,0.1, 0.01)) +w.seq <- c(seq(10000, 50000, 1000)) +seqpars <- list() +seqpars[["b"]] <- b.seq +seqpars[["dI"]] <- dI.seq +seqpars[["dV"]] <- dV.seq +seqpars[["p"]] <- p.seq +seqpars[["w"]] <- w.seq + +plot.likelihood.profiles(parnames, truepars, model, init, simdata, point.likelihood, seqpars) \ No newline at end of file diff --git a/Code/mcmc_wrapper_style/global_params.R b/Code/mcmc_wrapper_style/global_params.R new file mode 100644 index 0000000000000000000000000000000000000000..1bafe35c2074b0cfdf2b4278350292a76d8a793a --- /dev/null +++ b/Code/mcmc_wrapper_style/global_params.R @@ -0,0 +1,7 @@ +# Global params : limits of detection +LOD.RNA <- 1.48 +LOD.TCID <- 1.55 +# Penalty used in likelihood/posterior when model/prior returns NA/Inf (usually because of impossible parameter values) +penalty <- -100000 + +save(LOD.RNA, LOD.TCID, penalty, file = "../Output/global_params.RData")