Newer
Older
#' The getLRV function
#'
#' This function predicts the pathogen log reduction value for a wastewater or fecal sludge treatment plant sketched using the K2P Sketcher Tool (http://tools.waterpathogens.org/sketcher/)
#' @param sketch A JSON file containing information about the wastewater or fecal sludge treatment plant. This file must be in a very specific format and can be created using the K2P Sketcher Tool (http://tools.waterpathogens.org/sketcher/)
#' @param inFecalSludge Number of pathogens conveyed each year to the treatment plant in fecal sludge
#' @param inSewage Number of pathogens conveyed each year to the treatment plant in sewerage
#' @param pathogenType Pathogen group of interest (Virus, Bacteria, Protozoa, Helminth)
#' @keywords pathogens
#' @export
#' @examples
#' getLRV(mySketch="http://data.waterpathogens.org/dataset/1c681f80-82fa-4ed8-a572-cb98c886781a/resource/a87a552b-2f7c-4267-ac07-526eba1f4b68/download/lubigisewageandfecalsludgetreatmentsystem.json",pathogenType="Virus",inFecalSludge=10000000000,inSewage=10000000000)
#' getLRV(mySketch="http://data.waterpathogens.org/dataset/bd53bbc6-a8f3-4d10-95b6-35f36e274b3c/resource/5cf1c3e0-a28c-4293-81e9-12586dd8941c/download/kirinyawastewatertreatmentplant-jinja6.json",pathogenType="Virus",inFecalSludge=10000000000,inSewage=10000000000)
getLRV<-function(mySketch="http://data.waterpathogens.org/dataset/1c681f80-82fa-4ed8-a572-cb98c886781a/resource/a87a552b-2f7c-4267-ac07-526eba1f4b68/download/lubigisewageandfecalsludgetreatmentsystem.json"
,
myLRVdata="http://data.waterpathogens.org/dataset/eda3c64c-479e-4177-869c-93b3dc247a10/resource/9e172f8f-d8b5-4657-92a4-38da60786327/download/treatmentdata.csv"
,
pathogenType="Virus"
,
inFecalSludge=10000000000
,
inSewage=10000000000
){
k2pdata<-read.csv(myLRVdata,header=T)
suppressWarnings(k2pdata$SQRTlrv<-sqrt(k2pdata$lrv))
suppressWarnings(k2pdata$llrv<-log(k2pdata$lrv))
suppressWarnings(k2pdata$pathogen<-k2pdata$pathogen_group)
suppressWarnings(k2pdata$lhrt<-log(k2pdata$hrt_days))
suppressWarnings(k2pdata$SQRThrt<-sqrt(k2pdata$hrt_days))
suppressWarnings(k2pdata$SQRTht<-sqrt(k2pdata$holdingtime_days))
suppressWarnings(k2pdata$ldepth<-log(k2pdata$depth_meters))
suppressWarnings(k2pdata$temp<-k2pdata$temperature_celsius)
suppressWarnings(k2pdata$temp2<-k2pdata$temperature_celsius^2)
suppressWarnings(k2pdata$temp3<-k2pdata$temperature_celsius^3)
suppressWarnings(k2pdata$ltemp<-log(k2pdata$temperature_celsius))
suppressWarnings(k2pdata$SQRTmoist<-sqrt(k2pdata$moisture_content_percent))
lambdas<-c(Virus=0.2,Bacteria=0.3,Protozoa=0.6,Helminth=0.99) # these lambda values are based on data from the literature (Chauret et al., 1999; Lucena et al., 2004; Ramo et al., 2017; Rose et al., 1996; Tanji et al., 2002; Tsai et al., 1998)
lambda<-as.numeric(lambdas[pathogenType])
results<-data.frame(In_Fecal_Sludge=inFecalSludge,In_Sewage=inSewage,Sludge_Biosolids=NA,Liquid_Effluent=NA,Centralized_LRV=NA)
sketch=jsonlite::read_json(mySketch,simplifyVector = T)
#pData=read.csv(myData,header=T)
sketch$temperature<-as.double(sketch$temperature)
sketch$retentionTime<-as.double(sketch$retentionTime)
sketch$depth<-as.double(sketch$depth)
########((((((((this is the beginning of the old getNodes function))))))))
#res<-suppressWarnings(getNodes(sketch = sketch, nodes = sketch[,-c(2,3)]))
drop <- c("x","y","parents","children")
nodes = sketch[,!(names(sketch) %in% drop)]
nodes$number_inputs<-NA
nodes$number_outputs<-NA
for(i in 1:nrow(sketch)){
nodes$number_inputs[i]<-length(sketch[["parents"]][[i]])
nodes$number_outputs[i]<-length(sketch[["children"]][[i]])
nodes$loading_output=NA
sn<-sketch[,c("parents","children")]
sn$me<-as.character(sketch[,c("name")])
suppressWarnings( # this for loop turns all NULL parents and children to NA values, and it counts the number of parents (numParents) each node has
numParents[i]<-if(is.null(length(sn[i,1][[1]]))){0}else{length(sn[i,1][[1]])}
if(is.null(sn$parents[[i]]) | rlang::is_empty(sn$parents[[i]])){sn[i,1][[1]]<-NA}
if(is.null(sn$children[[i]]) | rlang::is_empty(sn$children[[i]])){sn[i,2][[1]]<-NA}
if(is.na(sn[[1]][[i]])){
j=j+1;rem[j]<-i
}
orph<-which(numParents==0)
arrows<-data.frame(us_node=rep(NA,sum(numParents)),ds_node=rep(NA,sum(numParents)))
sn<-sn[-orph,];rownames(sn)<-1:nrow(sn)
m=1
for(i in 1:nrow(sn)){
arrows$us_node[m]<-sn[i,"parents"][[1]][j]
arrows$ds_node[m]<-sn$me[i]
m=m+1
rownames(nodes)<-nodes$name
arrows$siblings<-nodes[arrows$us_node,"number_outputs"]
arrows$flowtype<-nodes[arrows$ds_node,"matrix"]
arrows$siblings_solid<-NA
arrows$siblings_liquid<-NA
arrows$iamsolid<-NA
for(i in 1:nrow(arrows)){
arrows$siblings_solid[i]<-sum(arrows$flowtype[which(arrows$us_node==arrows$us_node[i])]=="solid")
arrows$siblings_liquid[i]<-sum(arrows$flowtype[which(arrows$us_node==arrows$us_node[i])]=="liquid")
if(arrows$flowtype[i]=="solid"){arrows$iamsolid[i]<-TRUE}else{arrows$iamsolid[i]<-FALSE}
}
####(((((((this is the end of the old getNodes function)))))))
fit_ap<-lm(SQRTlrv ~ SQRThrt+temp+factor(pathogen),data=subset(k2pdata,technology_description=="Anaerobic Pond"))
fit_fp<-lm(SQRTlrv ~ lhrt+temp+factor(pathogen),data=subset(k2pdata,technology_description=="Facultative Pond"|technology_description=="Maturation Pond"))
fit_mp<-lm(SQRTlrv ~ lhrt+temp+factor(pathogen),data=subset(k2pdata,technology_description=="Facultative Pond"|technology_description=="Maturation Pond"))
fit_db<-lm(SQRTlrv ~ SQRTht+SQRTmoist+factor(pathogen),data=subset(k2pdata,technology_description=="Sludge Drying Bed"))
fit_tf<-lm(SQRTlrv ~ factor(pathogen),data=subset(k2pdata,technology_description=="Trickling Filter"))
fit_sd<-lm(SQRTlrv ~ factor(pathogen),data=subset(k2pdata,technology_description=="Sedimentation"))
# find the LRVs for each pathogen group, then solve the DAG!
pathogenGroups<-c("Virus","Bacteria","Protozoa","Helminth")
warnings<-vector(mode="character",length=0)
if(results$In_Fecal_Sludge>0 & any(nodes$subType=="fecal sludge")==FALSE){ #if the onsite system produces fecal sludge but the treatment plant does not accept any
results$To_Surface<-results$In_Fecal_Sludge
warnings[length(warnings)+1]<-"Warning: The onsite sanitation technologies in your system produce fecal sludge, but according to your sketch, the treatment plant does not accept fecal sludge."
results$In_Fecal_Sludge<-0
skipFS<-TRUE
}else{skipFS<-FALSE}
if(results$In_Sewage>0 & any(nodes$subType=="sewerage")==FALSE){ #if the onsite system produces sewerage but the treatment plant does not accept any
results$To_Surface<-results$In_Sewage
warnings[length(warnings)+1]<-"Warning: The onsite sanitation technologies in your system produce sewage, but according to your sketch, the treatment plant does not accept sewage."
results$In_Sewage<-0
skipWW<-TRUE
}else{skipWW<-FALSE}
nodes$loading_output<-NA
arrows$loading<-NA
if(skipFS==FALSE){nodes[nodes$subType=="fecal sludge",]$loading_output<-results$In_Fecal_Sludge}
if(skipWW==FALSE){nodes[nodes$subType=="sewerage",]$loading_output<-results$In_Sewage}
####(((((((this is the beginning of the old estimate, or getLRVs function)))))))
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#nodes<-estimate(nodes,pathogenType=pathogenType)
# transformation of user data to make predictions
nodes$lhrt<-NA
nodes$lhrt[nodes$subType=="anaerobic pond"|nodes$subType=="facultative pond"|nodes$subType=="maturation pond"]<-log(nodes$retentionTime[nodes$subType=="anaerobic pond"|nodes$subType=="facultative pond"|nodes$subType=="maturation pond"])
nodes$SQRThrt<-sqrt(nodes$retentionTime)
nodes$SQRTht<-sqrt(as.double(nodes$holdingTime))
nodes$ldepth<-NA
nodes$ldepth[nodes$subType=="anaerobic pond"|nodes$subType=="facultative pond"|nodes$subType=="maturation pond"]<-log(nodes$depth[nodes$subType=="anaerobic pond"|nodes$subType=="facultative pond"|nodes$subType=="maturation pond"])
nodes$temp<-nodes$temperature
nodes$temp2<-nodes$temperature^2
nodes$temp3<-nodes$temperature^3
nodes$ltemp<-NA
nodes$ltemp[nodes$subType=="sludge drying bed"]<-log(nodes$temperature[nodes$subType=="sludge drying bed"])
nodes$ltemp<-log(nodes$temperature)
nodes$SQRTmoist<-sqrt(as.double(nodes$moistureContent))
nodes$pathogen<-pathogenType
nodes$fit<-0;nodes$upr<-0;nodes$lwr<-0
# execution of models
if(any(nodes$subType=="anaerobic pond")==TRUE){nodes[nodes$subType=="anaerobic pond",c("fit","lwr","upr")]<-predict(fit_ap,nodes[nodes$subType=="anaerobic pond",],interval="confidence")^2}
if(any(nodes$subType=="facultative pond")==TRUE){nodes[nodes$subType=="facultative pond",c("fit","lwr","upr")]<-predict(fit_fp,nodes[nodes$subType=="facultative pond",],interval="confidence")^2}
if(any(nodes$subType=="maturation pond")==TRUE){nodes[nodes$subType=="maturation pond",c("fit","lwr","upr")]<-predict(fit_mp,nodes[nodes$subType=="maturation pond",],interval="confidence")^2}
if(any(nodes$subType=="sludge drying bed")==TRUE){
if(pathogenType=="Virus"){nodes[nodes$subType=="sludge drying bed",c("fit","lwr","upr")]<-1}else{nodes[nodes$subType=="sludge drying bed",c("fit","lwr","upr")]<-predict(fit_db,nodes[nodes$subType=="sludge drying bed",],interval="confidence")^2}
}
if(any(nodes$subType=="trickling filter")==TRUE){
if(pathogenType=="Helminth"){nodes[nodes$subType=="trickling filter",c("fit","lwr","upr")]<-1}else{nodes[nodes$subType=="trickling filter",c("fit","lwr","upr")]<-predict(fit_tf,nodes[nodes$subType=="trickling filter",],interval="confidence")^2}
}
if(any(nodes$subType=="settler/sedimentation")==TRUE){
if(pathogenType=="Protozoa"|pathogenType=="Helminth"){nodes[nodes$subType=="settler/sedimentation",c("fit","lwr","upr")]<-0}else{nodes[nodes$subType=="settler/sedimentation",c("fit","lwr","upr")]<-predict(fit_sd,nodes[nodes$subType=="settler/sedimentation",],interval="confidence")^2}
}
####placeholder LRVs until we get more data into the database####
if(any(nodes$subType=="biogas reactor")==TRUE){nodes[nodes$subType=="biogas reactor",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="co-composting")==TRUE){nodes[nodes$subType=="co-composting",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="activated sludge")==TRUE){nodes[nodes$subType=="activated sludge",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="uasb reactor")==TRUE){nodes[nodes$subType=="uasb reactor",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="media filer")==TRUE){nodes[nodes$subType=="media filer",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="imhoff tank")==TRUE){nodes[nodes$subType=="imhoff tank",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="aerated pond")==TRUE){nodes[nodes$subType=="aerated pond",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="ss")==TRUE){nodes[nodes$subType=="ss",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="fws")==TRUE){nodes[nodes$subType=="fws",c("fit","lwr","upr")]<-c(1,0,2)}
if(any(nodes$subType=="anaerobic baffled reactor")==TRUE){nodes[nodes$subType=="anaerobic baffled reactor",c("fit","lwr","upr")]<-c(1,0,2)}
####
####(((((((this is the end of the old estimate function)))))))
#######(((((((SOLVE IT SOLVE IT SOLVE IT)))))))
#######(((((((SOLVE IT SOLVE IT SOLVE IT)))))))
#######(((((((SOLVE IT SOLVE IT SOLVE IT)))))))
i=1;j=1;stuck=1 # here, j is an index for the nodes, i is an index for the arrows, stuck prevents the loop from getting infinitely stuck
keepGoing=TRUE
# this next monstrosity of a line finds all arrows who's parents are a source, then divides the parent's source load by the number of siblings to calculate the load in these "special" arrows.
arrows[which(arrows$us_node %in% nodes[nodes$ntype=="source",]$name),]$loading<-nodes[arrows[which(arrows$us_node %in% nodes[nodes$ntype=="source",]$name),]$us_node,]$loading_output/arrows[which(arrows$us_node %in% nodes[nodes$ntype=="source",]$name),]$siblings
while (keepGoing==TRUE){ ##### each loop focuses on a single node (nN[j+1]) and the arrow (i+1) that is going into it
if(any(arrows$ds_node==(nN[j]))==TRUE & is.na(sum(arrows[which(arrows$ds_node==(nN[j])),]$loading))==FALSE){ #2. DO I KNOW THE LOADINGS OF ARROWS COMING INTO ME # if there are any arrows coming into me (Node nN[j])...
nodes[nN[j],]$loading_output=10^(log10(sum(arrows[which(arrows$ds_node==(nN[j])),]$loading))-nodes[nN[j],]$fit) # then get the sum of all arrows coming into me (nN[j]), minus my LRV, to calculate my output loading
if(arrows[i,]$iamsolid==TRUE & arrows[i,]$siblings_liquid>0){ #CALCULATES THE LOADING IN THIS ARROW # if this arrow is a solid but has liquid siblings
arrows[i,]$loading=nodes[arrows[i,]$us_node,]$loading_output*lambda/arrows[i,]$siblings_solid # then use the factor lambda to divide the loading up between liquid vs. solid
if(arrows[i,]$iamsolid==FALSE & arrows[i,]$siblings_solid>0){ #CALCULATES THE LOADING IN THIS ARROW # if this arrow is a liquid but has solid siblings
arrows[i,]$loading=nodes[arrows[i,]$us_node,]$loading_output*(1-lambda)/arrows[i,]$siblings_liquid # then use the factor lambda to divide the loading up between liquid vs. solid
}else{arrows[i,]$loading=nodes[arrows[i,]$us_node,]$loading_output/arrows[i,]$siblings} # otherwise this arrow only has siblings that are the same as it (could be liquid or solid, but they're all the same), so just divide the loading by the number of siblings
stuck<-stuck+1
if(i==(nrow(arrows))){i=1} else {i=i+1}
if(j==(nrow(nodes))){j=1} else {j=j+1} ;arrows;nodes[,c("subType","loading_output")];i;j;nN[j]
if(stuck==1000){keepGoing = FALSE} else {keepGoing = (any(is.na(arrows$loading)) == TRUE | any(is.na(nodes$loading_output)) == TRUE)}
lrv=round(log10(sum(nodes$loading_output[nodes$ntype=="source"])/sum(nodes$loading_output[nodes$ntype=="end use"])),2)
references<-unique(k2pdata[nodes$subType %in% tolower(unique(k2pdata$technology_description)),]$bib_id)
#######(((((((I SOLVED IT!)))))))
#######(((((((I SOLVED IT!)))))))
#######(((((((I SOLVED IT!)))))))
if(any(nodes$matrix=="liquid")){results$Liquid_Effluent<-nodes[nodes$ntype=="end use" & nodes$matrix=="liquid",]$loading_output}else{results$Liquid_Effluent<-0}
if(any(nodes$matrix=="solid")){results$Sludge_Biosolids<-sum(nodes[nodes$ntype=="end use" & nodes$matrix=="solid",]$loading_output)}else{results$Sludge_Biosolids<-0}
loadings=results
loadings$Percent_Liquid<-round(loadings$Liquid_Effluent/(loadings$Liquid_Effluent+loadings$Sludge_Biosolids)*100,1)
loadings$Percent_Solid<-round(loadings$Sludge_Biosolids/(loadings$Liquid_Effluent+loadings$Sludge_Biosolids)*100,1)
arrows$relativeLoading<-arrows$loading/(results$In_Fecal_Sludge+results$In_Sewage)
arrows$us_node_type<-nodes[arrows$us_node,]$subType
arrows$ds_node_type<-nodes[arrows$ds_node,]$subType
#****#****#****#****#
uPs<-paste(unique(nodes$subType[nodes$ntype=="unit process"]), collapse = ', ')
methods<-paste("treats ",nodes$subType[nodes$ntype=="source"][1],if(length(nodes$subType[nodes$ntype=="source"]==2)){paste(" and",nodes$subType[nodes$ntype=="source"][2])},
" using the following technologies: ",
uPs,".",sep=""
);methods
nodes[nodes$ntype=="unit process",]
#****#****#****#****#
solved<-list(arrows=arrows[,c("us_node","ds_node","loading","flowtype","us_node_type","ds_node_type","relativeLoading")],
nodes=nodes[,c("name","ntype","subType","temperature","retentionTime","depth","useCategory","moistureContent","holdingTime","matrix","loading_output","pathogen")],
loadings=loadings,
references=references)
return(solved)