diff --git a/Code/start_fresh/Rift_WithinHost_NLOPTR_Testing.R b/Code/start_fresh/Rift_WithinHost_NLOPTR_Testing.R new file mode 100644 index 0000000000000000000000000000000000000000..6374007fd5954311cc992f1a38fad5c06aa923bc --- /dev/null +++ b/Code/start_fresh/Rift_WithinHost_NLOPTR_Testing.R @@ -0,0 +1,180 @@ +################################################################################## +#fitting inoculum data to models +################################################################################## +rm(list=ls()) #this clears the workspace to make sure no leftover variables are floating around. Not strictly needed +graphics.off(); #close all graphics windows +require(deSolve) #loads ODE solver package +require(nloptr) #for fitting +#require(snow) #for parallel runs on multiple cores + +## Set Work Directory ------------------------------------------------------ +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set to source file location + +#### ----------------- First part : -------------------------#### +# ------- Fit phenomenological model to viral load data --------# +# -------------- and extract key characteristics ---------------# +# Check out mixmodel-fit-adv.R from Li and Handel 2014 to see more on how to perform fitting with nloptr + +data <- read.csv("../../Data/RITRAdata/RITRA_data_formatted.csv") + +# one.sheep <- data[data$group_sheep_id == "Mosquito.1_1",] +# one.sheep.titer <- one.sheep[!is.na(one.sheep$tcid_vp),] +# IV infection, known inoculum : group A +one.group <- data[data$group == "B",] +one.group <- one.group[!is.na(one.group$tcid_vp) & one.group$tcid_vp != 1.55,] + +fitfunction <- function(pars.all, exp.data, parnames) +{ + names(pars.all)=parnames; #nloptr strips names from parameters, need to re-assign + tvec <- seq(1,15, by = 0.01) + model.pheno <- (2*pars.all["Vp"])/((exp(-pars.all["lambda_g"]*(tvec - pars.all["tp"])))+ + (exp(pars.all["lambda_d"]*(tvec - pars.all["tp"])))) + fit.pheno <- data.frame(time = tvec, titer = model.pheno) + SSR = 0 + for(indiv in exp.data$id){ + indiv.data <- exp.data[exp.data$id == indiv,] + indiv.fit.pheno <- fit.pheno[fit.pheno$time %in% indiv.data$time,] + SSR.indiv = sum((indiv.data$titer - indiv.fit.pheno$titer)^2)/sum(indiv.data$titer); + SSR <- SSR + SSR.indiv + } + return(SSR) +} + + +parnames <- c("Vp","lambda_g","tp","lambda_d") +p0 <- c(10**5, 24*0.18, 3, 24*0.059) +fit.data <- data.frame(time = one.group$dpi, titer = 10**one.group$tcid_vp, id = one.group$group_sheep_id) + +plevel=0; #how much diagnostics to print to the screen. Should be 0 for real runs. +xerr.global=1e-8; xerr.local=1e-6; #used for multi-start solver MLS +xerr.general=1e-7; #used for all other solvers + +steps=-1 #1000; #max steps to take per optimizer iteration - set to -1 if we don't want to use this criterion. Used for both local and global solvers +mtime=2*60 # runtime per bootstrap (in seconds) +lb = rep(0,4) +ub = rep(Inf, 4) +fres <- nloptr(x0=p0,eval_f=fitfunction, lb=lb,ub=ub, + opts=list("algorithm"="NLOPT_LN_SBPLX",xtol_rel=xerr.local,maxeval=steps,maxtime=mtime, + print_level=plevel),exp.data=fit.data, parnames=parnames); + +pars.all <- fres$solution +names(pars.all) <- parnames +tvec <- seq(1,15, by = 0.01) +model.pheno <- (2*pars.all["Vp"])/((exp(-pars.all["lambda_g"]*(tvec - pars.all["tp"])))+ + (exp(pars.all["lambda_d"]*(tvec - pars.all["tp"])))) +pred <- data.frame(time = tvec, pred = model.pheno) +ggplot() + geom_line(data=pred, aes(x = tvec, y = log10(model.pheno))) + + geom_point(data = fit.data, aes(x = time, y = log10(titer))) + ylim(-8,8) + +#simulate_basicvirus_ode from DSAIRM package +function (U = 1e+05, I = 0, V = 10, n = 0, dU = 0, dI = 1, dV = 4, + b = 1e-06, p = 100, g = 1, tstart = 0, tfinal = 50, dt = 0.1) +{ + basicvirus_ode_fct <- function(t, y, parms) { + with(as.list(c(y, parms)), { + dU = n - dU * U - b * V * U + dI = b * V * U - dI * I + dV = p * I - dV * V - b * g * V * U + list(c(dU, dI, dV)) + }) + } + timevec = seq(tstart, tfinal, by = dt) + vars = c(U = U, I = I, V = V) + pars = c(n = n, dU = dU, dI = dI, dV = dV, b = b, p = p, + g = g) + odeout = deSolve::ode(y = vars, parms = pars, times = timevec, + func = basicvirus_ode_fct, atol = 1e-12, rtol = 1e-12) + result <- list() + result$ts <- as.data.frame(odeout) + return(result) +} + +## basicvirusfit from DSAIRM package +function (U = 1e+06, I = 0, V = 1, n = 0, dU = 0, dI = 2, g = 0, + p = 0.001, plow = 1e-04, phigh = 100, psim = 10, b = 0.1, + blow = 0.001, bhigh = 10, bsim = 1e-04, dV = 1, dVlow = 0.01, + dVhigh = 100, dVsim = 5, noise = 0, iter = 1, solvertype = 1, + usesimdata = 0) +{ + basicfitfunction <- function(params, fitdata, Y0, xvals, + fixedpars, fitparnames, LOD) { + names(params) = fitparnames + allpars = c(Y0, params, tfinal = max(xvals), dt = 0.1, + tstart = 0, fixedpars) + odeout <- try(do.call(DSAIRM::simulate_basicvirus_ode, + as.list(allpars))) + modelpred = odeout$ts[match(fitdata$xvals, odeout$ts[, + "time"]), "V"] + logvirus = c(log10(pmax(1e-10, modelpred))) + logvirus[(fitdata$outcome <= LOD & (fitdata$outcome - + logvirus) > 0)] = LOD + return(sum((logvirus - fitdata$outcome)^2)) + } + result <- list() + atolv = 1e-08 + rtolv = 1e-08 + maxsteps = iter + LOD = hayden96flu$LOD[1] + fitdata = subset(hayden96flu, txtime == 200, select = c("HoursPI", + "LogVirusLoad")) + colnames(fitdata) = c("xvals", "outcome") + fitdata$xvals = fitdata$xvals/24 + Y0 = c(U = U, I = I, V = V) + xvals = seq(0, max(fitdata$xvals), 0.1) + if (usesimdata == 1) { + modelpars = c(n = n, dU = dU, dI = dI, dV = dVsim, b = bsim, + p = psim, g = g) + allpars = c(Y0, tfinal = max(fitdata$xvals), tstart = 0, + dt = 0.1, modelpars) + odeout <- do.call(DSAIRM::simulate_basicvirus_ode, as.list(allpars)) + simres = odeout$ts + simdata = data.frame(simres[match(fitdata$xvals, simres[, + "time"]), ]) + simdata$simres = log10(simdata$V) + simdata = subset(simdata, select = c("time", "simres")) + colnames(simdata) = c("xvals", "outcome") + fitdata$outcome = simdata$outcome + noise * stats::runif(length(simdata$outcome), + -1, 1) * simdata$outcome + } + fixedpars = c(n = n, dU = dU, dI = dI, g = g) + par_ini = as.numeric(c(p, b, dV)) + lb = as.numeric(c(plow, blow, dVlow)) + ub = as.numeric(c(phigh, bhigh, dVhigh)) + fitparnames = c("p", "b", "dV") + if (solvertype == 1) { + algname = "NLOPT_LN_COBYLA" + } + if (solvertype == 2) { + algname = "NLOPT_LN_NELDERMEAD" + } + if (solvertype == 3) { + algname = "NLOPT_LN_SBPLX" + } + bestfit = nloptr::nloptr(x0 = par_ini, eval_f = basicfitfunction, + lb = lb, ub = ub, opts = list(algorithm = algname, xtol_rel = 1e-10, + maxeval = maxsteps, print_level = 0), fitdata = fitdata, + Y0 = Y0, xvals = xvals, fixedpars = fixedpars, fitparnames = fitparnames, + LOD = LOD) + params = bestfit$solution + names(params) = fitparnames + modelpars = c(params, fixedpars) + allpars = c(Y0, modelpars, tfinal = max(fitdata$xvals)) + odeout <- do.call(simulate_basicvirus_ode, as.list(allpars)) + simres = odeout$ts + modelpred = simres[match(fitdata$xvals, simres[, "time"]), + "V"] + modelpred = odeout$ts[match(fitdata$xvals, odeout$ts[, "time"]), + "V"] + logvirus = c(log10(pmax(1e-10, modelpred))) + logvirus[(fitdata$outcome <= LOD & (fitdata$outcome - logvirus) > + 0)] = LOD + ssrfinal = (sum((logvirus - fitdata$outcome)^2)) + result$ts = odeout$ts + result$bestpars = params + result$SSR = ssrfinal + fitdata$outcome = 10^fitdata$outcome + fitdata$varnames = "V_data" + colnames(fitdata) = c("xvals", "yvals", "varnames") + result$data = fitdata + return(result) +} \ No newline at end of file diff --git a/Code/start_fresh/Rift_WithinHost_Temperature.R b/Code/start_fresh/Rift_WithinHost_Temperature.R index 6c3a66fa180b5693c18ae7140b8a84580c68c2d9..f2237c8cb0a7122d59e9b36b2ef1b7daa40b1967 100644 --- a/Code/start_fresh/Rift_WithinHost_Temperature.R +++ b/Code/start_fresh/Rift_WithinHost_Temperature.R @@ -24,7 +24,78 @@ getwd() ## ------------------------------- -within_data <- read.csv("../Data/RITRAdata/RITRA_data_formatted.csv") + +# Try the approach of dynamic linear fitting between temperature and tcid +# to do : try with rna and ratio +data <- data[!is.na(data$tcid_vp),] +data$dpi <- seq(1,156) +to.predict <- data[data$dpi > 75,] + +res <- NULL +for(d in as.list(to.predict$dpi)){ # as.list avoids d loosing the Date type + training <- data[data$dpi < d,] # strictly inferior to always have 1 out-of-sample to make prediction + model <- lm(tcid_vp ~ Temperature, data = training) + pred <- predict(model, newdata = to.predict[to.predict$dpi == d,]) + res <- c(res, pred) +} +to.predict$pred_dynamic <- res + + +ggplot(to.predict) + geom_line(aes(x = dpi, y = tcid_vp), col = "blue") + + geom_line(aes(x = dpi, y = pred_dynamic), col = "orange") + +within_data <- read.csv("../../Data/RITRAdata/RITRA_data_formatted.csv") +ggplot(within_data[within_data$tcid_vp != 1.55 & within_data$rna_V != 1.48,]) + geom_point(aes(x = Temperature, y = ratio)) +model <- lm(ratio~Temperature, data = within_data[within_data$tcid_vp != 1.55 & within_data$rna_V != 1.48,]) +summary(model) + +temp_previous_day <- within_data$Temperature[-288] +tcid_present_day <- within_data$tcid_vp[-1] +rna_present_day <- within_data$rna_V[-1] +ratio_present_day <- within_data$ratio[-1] + +df <- data.frame(temp_previous = temp_previous_day, tcid_present = tcid_present_day, + rna_present = rna_present_day, ratio_present = ratio_present_day) +df <- na.omit(df) +df[df$tcid_present == 1.55,]$tcid_present <- runif(n=sum(df$tcid_present == 1.55), min = 0, max = 1.55) +df[df$rna_present == 1.48,]$rna_present <- runif(n=sum(df$rna_present == 1.48), min = 0, max = 1.48) +df$ratio_present <- (10**df$tcid_present) / (10**df$rna_present) + +ggplot(df) + geom_point(aes(x = temp_previous, y = rna_present)) +model <- lm(rna_present ~ temp_previous, data = df) +summary(model) + +ggplot(df) + geom_point(aes(x = temp_previous, y = tcid_present)) + +ggplot(df[df$ratio_present <1,]) + geom_point(aes(x = temp_previous, y = ratio_present)) + +slope_temp_previous <- (within_data$Temperature[-1] - within_data$Temperature[-288])/within_data$Temperature[-288] +slope_tcid_present <- (within_data$tcid_vp[-1] - within_data$tcid_vp[-288]) / within_data$tcid_vp[-288] +slope_rna_present <- (within_data$rna_V[-1] - within_data$rna_V[-288]) / within_data$rna_V[-288] + +df.slope <- data.frame(temp_previous = slope_temp_previous, tcid_present = slope_tcid_present, + rna_present = slope_rna_present, ratio_present = ratio_present_day) + +ggplot(df.slope) + geom_point(aes(x = temp_previous, y = tcid_present)) + + geom_hline(yintercept = 0, col = "green") + geom_vline(xintercept = 0, col = "green") + +ggplot(df.slope) + geom_point(aes(x = temp_previous, y = rna_present)) + + geom_hline(yintercept = 0, col = "green") + geom_vline(xintercept = 0, col = "green") + +ggplot(df.slope[df.slope$ratio_present <1,]) + geom_point(aes(x = temp_previous, y = ratio_present)) + +model <- lm(rna_present ~ temp_previous, data = df.slope) +summary(model) + +within_data$temp_slope_previous <- c(slope_temp_previous,NA) +within_data$slope_rna_present <- c(NA, slope_rna_present) + +group_lm <- group_by(within_data, group_sheep_id) %>% do(model = lm(slope_rna_present ~ temp_slope_previous, .)) + +library(nlme) # for lmList +group_lm <- lmList(slope_rna_present ~ temp_slope_previous | group_sheep_id, na.omit(within_data), pool = FALSE) +coef(summary(group_lm)) +############### test <- within_data %>% group_by(group_sheep_id) %>% dplyr::mutate(max.t = max(Temperature, na.rm = T), max.v = max(rna_V, na.rm = T), max.vp = max(tcid_vp, na.rm = T))