From adce632abdac44397e425c76173666404fd80d60 Mon Sep 17 00:00:00 2001 From: "Hartanto, Margi" <margi.hartanto@wur.nl> Date: Tue, 28 Apr 2020 13:51:22 +0200 Subject: [PATCH] add functions folder and fix horizontal threshold on eQTL histogram --- .gitignore | 1 - 3-eqtl-visualization.R | 8 +- 4-qtl-integration.R | 1 - functions/Auxiliary_functions.R | 102 +++++ functions/Data_preparation.R | 113 ++++++ functions/Heritabilities.R | 312 +++++++++++++++ functions/QTL_mapping.R | 448 +++++++++++++++++++++ functions/QTL_mapping_more.R | 32 ++ functions/QTL_simulation_permutation.R | 256 ++++++++++++ functions/eQTL_based_analyses.R | 126 ++++++ functions/emma_functions.r | 515 +++++++++++++++++++++++++ 11 files changed, 1906 insertions(+), 8 deletions(-) create mode 100644 functions/Auxiliary_functions.R create mode 100644 functions/Data_preparation.R create mode 100644 functions/Heritabilities.R create mode 100644 functions/QTL_mapping.R create mode 100644 functions/QTL_mapping_more.R create mode 100644 functions/QTL_simulation_permutation.R create mode 100644 functions/eQTL_based_analyses.R create mode 100644 functions/emma_functions.r diff --git a/.gitignore b/.gitignore index 16132e0..1b0c992 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,6 @@ .Ruserdata figures files -functions networks qtl-peaks qtl-permutations diff --git a/3-eqtl-visualization.R b/3-eqtl-visualization.R index cf86a1d..cf64978 100644 --- a/3-eqtl-visualization.R +++ b/3-eqtl-visualization.R @@ -20,7 +20,6 @@ library(UpSetR) library(forcats) library(grid) - library(heritability) library(topGO) library(org.At.tair.db) @@ -33,9 +32,6 @@ # library(factoextra) # library(stats) # library(amap) - # - # - # # library(gridExtra) # library(RCy3) # library(corrplot) @@ -344,8 +340,8 @@ ylim(c(0, 100)) + geom_hline(data = hist.threshold, aes(yintercept = threshold), linetype = 'dashed', - color = 'red', - size = .1) + + color = 'black', + size = .5) + scale_x_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40)) tiff(file = paste0("figures/eqtl-hist.tiff"), diff --git a/4-qtl-integration.R b/4-qtl-integration.R index 8a906c7..06ed303 100644 --- a/4-qtl-integration.R +++ b/4-qtl-integration.R @@ -92,7 +92,6 @@ geom_histogram(binwidth = 2000000, right = T, origin = 0, alpha = 1) + #geom_density(alpha = 0.5) + facet_grid(factor(qtl_level, level = c('phQTL', 'mQTL', 'eQTL')) ~ qtl_chromosome, scales="free") + - presentation + scale_fill_manual(values = c('#000000', '#ccbb44', '#228833', '#4477aa', '#cc3311')) + theme(legend.position = "top") + diff --git a/functions/Auxiliary_functions.R b/functions/Auxiliary_functions.R new file mode 100644 index 0000000..c1ac574 --- /dev/null +++ b/functions/Auxiliary_functions.R @@ -0,0 +1,102 @@ +###Linear models + ###Fast summary function + ###This function cannot handle NA's! + fast.summ <- function(input){ + p <- input$rank + rdf <- input$df.residual + Qr <- input$qr + n <- nrow(Qr$qr) + + p1 <- 1L:p + r <- input$residuals + rss <- colSums(r^2) + + resvar <- rss/rdf + R <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) + se <- sqrt(rep(diag(R),each=length(resvar)) * rep(resvar,times=length(diag(R)))) + est <- input$coefficients[Qr$pivot[p1],] + tval <- t(t(est)/se) + + pvals <- round(-log10(2 * pt(abs(tval),rdf, lower.tail = FALSE)),digits=2) + output <- cbind(t(pvals)[,-1],t(round(est,digits=5))[,-1]) + return(output) + } + + + ###Fast linear model, 1 variable + ###Uses fast.summ, can't handle NA's! + fast.lm.1 <- function(traits,variable){ + if(missing(traits)){ stop("specify traits")} + if(missing(variable)){ stop("specify variable")} + + output <- fast.summ(lm(t(traits)~variable)) + return(output) + } + + + ###Fast linear model, 2 variables, additive + fast.lm.2.add <- function(variable1,traits,variable2){ + if(missing(traits)){ stop("specify traits")} + if(missing(variable1)){ stop("specify variable1")} + if(missing(variable2)){ stop("specify variable2")} + + output <- fast.summ(lm(t(traits)~variable1+variable2)) + if(dim(output)[2]==2){ + output <- cbind(output[,1],NA,output[,2],NA) + } + if(dim(output)[2]==4){ + colnames(output) <- c("pval1","pval2","eff1","eff2") + return(output) + } + } + + + ###Fast linear model, 2 variables, interaction + fast.lm.2.int <- function(variable1,traits,variable2){ + if(missing(traits)){ stop("specify traits")} + if(missing(variable1)){ stop("specify variable1")} + if(missing(variable2)){ stop("specify variable2")} + + output <- fast.summ(lm(t(traits)~variable1*variable2)) + if(dim(output)[2]==2){ + output <- cbind(output[,1],NA,NA,output[,2],NA,NA) + } + if(dim(output)[2]==4){ + output <- cbind(output[,1:2],NA,output[,3:4],NA) + } + if(dim(output)[2]==6){ + colnames(output) <- c("pval1","pval2","pvali","eff1","eff2","effi") + return(output) + } + } + + ###Slow linear model for NA handling + lm.1 <- function(data.input,variable){ + lm.tapply <- function(y,x){return(c(diff(tapply(y[x!=0 & !is.na(x)],x[x!=0 & !is.na(x)],mean,na.rm=T)),cor.test(y[x!=0 & !is.na(x)],x[x!=0 & !is.na(x)],na.action=na.exclude())$p.value))} + + out <- tapply(as.numeric(t(data.input)),rep(rownames(data.input),each=ncol(data.input)),lm.tapply,x=variable) + out <- unlist(out) + out <- cbind(LOD=-log10(out[seq(2,length(out),by=2)]),effect=out[seq(1,length(out),by=2)]) + + return(out) + } + + + + ###Permutation function + permutate.traits <- function(trait.matrix){ + perm.list <- rep(c(1:nrow(trait.matrix)),times=ncol(trait.matrix)) + runif(length(trait.matrix),0.25,0.75) + perm.list <- order(perm.list) + perm.data <- matrix(as.numeric(trait.matrix)[perm.list],nrow=nrow(trait.matrix),ncol=ncol(trait.matrix),byrow=TRUE) + return(perm.data) + } + + + ###calculates R-squared as in a linear model + R.sq <- function(x,y){return(cor(x,y,use="na.or.complete")^2)} + + + + + + diff --git a/functions/Data_preparation.R b/functions/Data_preparation.R new file mode 100644 index 0000000..95b9084 --- /dev/null +++ b/functions/Data_preparation.R @@ -0,0 +1,113 @@ +###Wrongly labeled sample function, takes eQTL table output +###v1.1 Fixed marker calling, now markers are re-calculated based on the population used (so the known genetic +### information about the population). + eQTL.WLS <- function(eQTL.table.output,mean.centered.data,strain.map,strain.marker){ + if(missing(eQTL.table.output)){ stop("need output from eQTL.table")} + if(missing(mean.centered.data)){ stop("need mean centered data: columns: trait, strain, ratio, sample")} + if(missing(strain.map)){ stop("strain.map missing; need the population map")} + if(missing(strain.marker)){ stop("strain.marker missing; need information about the markers: name, chromosome, and basepair")} + + ###Force strain.marker column names into format + if(ncol(strain.marker) != 3){ stop("need 3 columns in strain.marker")} + colnames(strain.marker) <- c("name","chromosome","basepair") + strain.marker <- mutate(strain.marker,marker=1:nrow(strain.map)) + + ###Force mean.centered.data column names into format + if(ncol(mean.centered.data) != 4){ stop("need 4 columns in mean.centered.data")} + colnames(mean.centered.data) <- c("trait", "strain", "ratio", "sample") + + ###Convert genotypes to list + geno.list <- mutate(as.data.frame(strain.map),marker=c(1:nrow(strain.map))) %>% + gather(key=strain_genotype,value=genotype,-marker) %>% + filter(!is.na(genotype)) + + ###Take only the cis eQTL + eQTL.table.output <- filter(eQTL.table.output,qtl_type=="cis") %>% + select(trait,qtl_chromosome,qtl_bp,qtl_significance,qtl_effect,gene_chromosome,gene_bp,gene_WBID) %>% + merge(strain.marker,by.x=2,by.y=2) %>% + group_by(trait,qtl_significance,qtl_effect) %>% + mutate(closeness=abs(qtl_bp-basepair)) %>% + mutate(closest=ifelse(closeness==min(closeness),"yep","nope")) %>% + filter(!duplicated(closest),closest=="yep") %>% + as.data.frame() %>% + select(marker,trait,qtl_chromosome,qtl_bp,qtl_significance,qtl_effect,gene_chromosome,gene_bp,gene_WBID) %>% + merge(mean.centered.data) + + ###Use the cis QTL to calculate the correlation between the predicted (eQTL) effect versus the expression in the strain + out <- NULL + for(i in 1:length(unique(geno.list$strain_genotype))){ + out.tmp <- merge(eQTL.table.output,filter(geno.list,strain_genotype==unique(geno.list$strain_genotype)[i])) %>% + group_by(sample,strain,strain_genotype) %>% + summarise(correlation = cor(x=genotype*ratio,y=qtl_effect,use="complete.obs")) %>% + as.data.frame() + out <- rbind(out,out.tmp) + } + out <- group_by(out,sample) %>% + mutate(rank.map=length(correlation)-rank(correlation)+1) %>% + as.data.frame() + + ###Return the correlations + return(out) + } + +###Plotting function for WLS data + plot.eQTL.WLS <- function(eQTL.WLS.output,filename){ + if(missing(filename )){ filename <- "WLS"} + + ###Write a pdf with the correlations for manual inspection + pdf(paste(filename,".pdf",sep=""),width=18,height=8) + for(i in 1:length(unique(eQTL.WLS.output$sample))){ + to.plot <- filter(eQTL.WLS.output,sample==unique(eQTL.WLS.output$sample)[i]) + plot(y=to.plot$correlation,x=to.plot$rank.map,main=unique(eQTL.WLS.output$sample)[i],axes=F,xlab="Predicted strain",ylab="Correlation",pch=19,col=rgb(0,0,0,alpha=0.5)) + axis(1,c(1:nrow(to.plot)),labels=to.plot$strain_genotype[order(to.plot$rank.map)],las=2,cex.axis=0.5) + axis(2,c(-1,-0.5,0,0.5,1),c(-1,-0.5,0,0.5,1),las=2) + text(y=seq(max(to.plot$correlation),mean(to.plot$correlation),length=10),x=0.75*nrow(to.plot),labels=apply(to.plot[order(to.plot$rank.map),][1:10,c(3,4)],1,paste,collapse=" ")) + box() + } + dev.off() + } + + + +###Data preparation + QTL.data.prep <- function(trait.matrix,strain.trait,strain.map,strain.marker){ + if(missing(trait.matrix)){ stop("requires trait data, traits per row and strains per column")} + if(missing(strain.trait)){ stop("requires the strain names matching the trait data")} + if(missing(strain.map)){ stop("requires the genetic map of the population")} + if(missing(strain.marker)){ stop("requires the marker info (name, chromosome, basepair) of the map")} + if(ncol(trait.matrix) != length(strain.trait)){ stop("the number of strains does not correspond with the trait matrix")} + if(sum(tolower(strain.trait) %in% tolower(colnames(strain.map))) < length(strain.trait)){ + warning("not all strains are in the strain.map file")} + if(nrow(strain.map) != nrow(strain.marker)){ stop("the number of markers (strain.map) does not equal the number of marker descriptions (strain.marker)")} + if(ncol(strain.marker) != 3){ stop("make sure the strain.marker file contains a column with the marker name, chromosome, and bp location")} + strain.map <- data.matrix(strain.map) + + ###Make matrix + map.nu <- matrix(NA,nrow=nrow(strain.map),ncol=length(strain.trait)) + for(j in 1:length(strain.trait)){ + map.nu[,j] <- strain.map[,tolower(colnames(strain.map)) == tolower(strain.trait[j])] + } + colnames(map.nu) <- strain.trait + rownames(map.nu) <- rownames(strain.map) + + ###add rownames to the trait matrix + if(length(rownames(trait.matrix)) == 0){ + rownames(trait.matrix) <- 1:nrow(trait.matrix) + } + + ###Make output + output <- NULL + output[[1]] <- trait.matrix + output[[2]] <- map.nu + output[[3]] <- data.frame(strain.marker) + names(output) <- c("Trait","Map","Marker") + + ###return output + return(output) + } + + + + + + \ No newline at end of file diff --git a/functions/Heritabilities.R b/functions/Heritabilities.R new file mode 100644 index 0000000..bf0fcd2 --- /dev/null +++ b/functions/Heritabilities.R @@ -0,0 +1,312 @@ + ###calculates H2 based on REML (part of H2.broad) + H2.REML <- function(input,Vg.factor){ + if(missing(Vg.factor)){Vg.factor <- 1} + + REML <- lmer(trait.value ~ 1 + (1|strain),data=input) + Variations <- as.data.frame(VarCorr(REML))$vcov + + H2 <- c((Variations[1]*Vg.factor)/sum(Variations*c(Vg.factor,1)),(Variations*c(Vg.factor,1))) + names(H2) <- c("H2_REML","Vg","Ve") + return(H2) + } + + ###Calculates H2 based on ANOVA (part of H2.broad) + H2.ANOVA <- function(input,Vg.factor){ + if(missing(Vg.factor)){Vg.factor <- 1} + + ANOVA <- anova(lm(trait.value~strain,data=input)) + Variations <- ANOVA[[2]] + + H2 <- c((Variations[1]*Vg.factor)/sum(Variations*c(Vg.factor,1)),(Variations*c(Vg.factor,1))) + names(H2) <- c("H2_ANOVA","Vg","Ve") + return(H2) + } + + ###Calculates H2 as described in Keurentjes, 2007, in case error has to be estimated on limited number of strains + ###Permutation is based on Brem, 2005 + H2.Keurentjes <- function(input,parental.strain,Vg.factor){ + if(missing(Vg.factor)){Vg.factor <- 1} + + selc.parental <- input$strain %in% parental.strain + + ###Ve is estimated based on parental replicates + VAR.PL <- tapply(input$trait.value[selc.parental],as.character(unlist(input$strain[selc.parental])),var) + N.PL <- tapply(input$trait.value[selc.parental],as.character(unlist(input$strain[selc.parental])),length) + VAR.E <- sum(VAR.PL*(N.PL-1))/sum((N.PL-1)) + + ##Vt is estimated based on RILs + VAR.T <- var(input$trait.value[!selc.parental]) + + H2 <- c(((VAR.T-VAR.E)*Vg.factor)/VAR.T,((VAR.T-VAR.E)*Vg.factor),VAR.E) + names(H2) <- c("H2_keurentjes","Vg","Ve") + return(H2) + } + + ###Wrapper function for broad-sense heritability + H2.broad <- function(trait.value,strain,trait,method,parental.strain,n.perm,Vg.factor){ + if(missing(trait.value)){ stop("requires trait value")} + if(missing(strain)){ stop("requires strain values")} + if(missing(trait)){ trait <- rep("A",length(trait.value))} + if(missing(method)){ method <- "ANOVA"} + if(!method %in% c("ANOVA","REML","Keurentjes_2007")){ stop("method should be either ANOVA, REML, or Keurentjes_2007")} + if(method=="Keurentjes_2007"){ + if(missing(parental.strain)){ stop("when using Keurentjes_2007, parental strain names should be provided")} + if(sum(parental.strain %in% strain) != length(parental.strain)){ + stop("not all parental strains are in the data")} + } + if(missing(n.perm)){ n.perm <- 1000} + if(missing(Vg.factor)){ Vg.factor <- 1} + + ###Prep the data + input <- data.frame(cbind(trait.value=trait.value,strain=strain)) + input$trait.value <- as.numeric(as.character(unlist(input$trait.value))) + input$strain <- as.factor(as.character(unlist(input$strain))) + + input <- split(input,trait) + + if(method == "ANOVA"){ + H2 <- t(sapply(input,H2.ANOVA,Vg.factor)) + + perm <- NULL + for(i in 1:n.perm){ + input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)}) + H2.perm <- t(sapply(input.perm,H2.ANOVA,Vg.factor)) + perm <- rbind(perm,H2.perm[,1]) + } + + H2 <- cbind(H2,FDR0.05_ANOVA=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]})) + } + if(method == "REML"){ + H2 <- t(sapply(input,H2.REML,Vg.factor)) + + perm <- NULL + for(i in 1:n.perm){ + input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)}) + H2.perm <- t(sapply(input.perm,H2.REML,Vg.factor)) + perm <- rbind(perm,H2.perm[,1]) + } + + H2 <- cbind(H2,FDR0.05_REML=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]})) + } + if(method == "Keurentjes_2007"){ + H2 <- t(sapply(input,H2.Keurentjes,parental.strain = parental.strain,Vg.factor)) + + perm <- NULL + for(i in 1:n.perm){ + input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)}) + H2.perm <- t(sapply(input.perm,H2.Keurentjes,parental.strain = parental.strain,Vg.factor)) + perm <- rbind(perm,H2.perm[,1]) + } + + H2 <- cbind(H2,FDR0.05_Keurentjes=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]})) + } + + output <- as.data.frame(cbind(trait=names(input),round(H2,digits=3))) + output[,1] <- as.character(unlist(output[,1])) + output[,2] <- as.numeric(as.character(unlist(output[,2]))) + output[,3] <- as.numeric(as.character(unlist(output[,3]))) + output[,4] <- as.numeric(as.character(unlist(output[,4]))) + output[,5] <- as.numeric(as.character(unlist(output[,5]))) + + return(output) + } + + + + + ###Transgression, calculated and permutated as in Brem, 2005 + transgression.calc <- function(trait.value,strain,trait,parental.strain,sigmas,n.perm){ + if(missing(trait.value)){ stop("provide trait values")} + if(missing(strain)){ stop("provide strain names")} + if(missing(trait)){ trait <- rep("A",length(trait_value))} + if(missing(parental.strain)){ stop("provide names of two parental strains")} + if(missing(sigmas)){ sigmas <- 2} + if(missing(n.perm)){ n.perm <- 1000} + + ###Convenience function for transgression + transg.sapply <- function(input,parental.strain,sigmas){ + selc.parental1 <- tolower(input$strain) %in% tolower(parental.strain[1]) + selc.parental2 <- tolower(input$strain) %in% tolower(parental.strain[2]) + + VAR.PL <- tapply(input$trait.value[(selc.parental1|selc.parental2)],input$strain[(selc.parental1|selc.parental2)],var) + N.PL <- tapply(input$trait.value[(selc.parental1|selc.parental2)],input$strain[(selc.parental1|selc.parental2)],length) + SD.PL <- (sum(VAR.PL*(N.PL-1))/sum((N.PL-1)))^2 + + MEAN.PL1 <- mean(input$trait.value[selc.parental1],na.rm=T) + MEAN.PL2 <- mean(input$trait.value[selc.parental2],na.rm=T) + + tmp <- tapply(input$trait.value[!(selc.parental1|selc.parental2)],input$strain[!(selc.parental1|selc.parental2)],mean,na.rm=T) + + transg <- sum(tmp < (min(c(MEAN.PL1,MEAN.PL2))-sigmas*SD.PL) | + tmp > (max(c(MEAN.PL1,MEAN.PL2))+sigmas*SD.PL)) + + return(transg) + } + + ###Prep the data + input <- data.frame(cbind(trait.value=trait.value,strain=strain)) + input$trait.value <- as.numeric(as.character(unlist(input$trait.value))) + input$strain <- as.character(unlist(input$strain)) + + input <- split(input,trait) + + transg <- sapply(input,transg.sapply,parental.strain = parental.strain, sigmas = sigmas) + + perm <- NULL + for(i in 1:n.perm){ + input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)}) + transg.perm <- sapply(input.perm,transg.sapply,parental.strain = parental.strain, sigmas = sigmas) + perm <- rbind(perm,transg.perm) + } + FDR0.1 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.90)]}) + FDR0.05 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]}) + FDR0.01 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.99)]}) + + output <- as.data.frame(cbind(trait=names(input),n_strains_transgression=transg,FDR0.1=FDR0.1,FDR0.05=FDR0.05,FDR0.01=FDR0.01)) + output[,1] <- as.character(unlist(output[,1])) + output[,2] <- as.numeric(as.character(unlist(output[,2]))) + output[,3] <- as.numeric(as.character(unlist(output[,3]))) + output[,4] <- as.numeric(as.character(unlist(output[,4]))) + output[,5] <- as.numeric(as.character(unlist(output[,5]))) + return(output) + } + + + + + + ###Narrow sense heritabilities as calculated in Rockman, 2010. (see also Speed et al, 2012, American journal of human genetics). + h2.emma <- function(input,strain.map,kinship.matrix,Vg.factor){ + if(missing(input)){ stop("provide a vector with the phenotype, matching the strain.map")} + if(missing(strain.map)){ stop("provide a matrix with genotypes (encoded as -1, 1), columns strains, rows markers")} + if(ncol(strain.map) != length(input)){ stop("phenotype vector does not match strain.map")} + if(missing(kinship.matrix)){ + strain.map[strain.map==0 & !is.na(strain.map)] <- 0.5 + strain.map[strain.map==-1 & !is.na(strain.map)] <- 0 + strain.map[is.na(strain.map)] <- 0.5 + kinship.matrix <- emma.kinship(snps=strain.map) + } + if(missing(Vg.factor)){ Vg.factor <- 1} + + ###Calculate Vg and Ve + emmaREML <- emma.REML.t(ys=input,xs=strain.map,K=kinship.matrix) + + ###Calculate heritability + Vg <- sum(emmaREML$vgs,na.rm=T) + Ve <- sum(emmaREML$ves,na.rm=T) + + h2 <- c((Vg*Vg.factor)/((Vg*Vg.factor)+Ve),Vg,Ve) + names(h2) <- c("h2_emma","Vg","Ve") + + return(h2) + } + + ###narrow sense heritabilities as calculated by heritability package, from Kruijer, Boer & Malosetti, 2015 + h2.REML <- function(input,strain.names,kinship.matrix,Vg.factor){ + if(missing(input)){ stop("provide a vector with the phenotype, matching the strain.map")} + if(missing(strain.names)){ stop("provide a matrix with strain names matching the phenotype")} + if(length(strain.names) != length(input)){ stop("phenotype vector does not match strain.map")} + if(missing(Vg.factor)){ Vg.factor <- 1} + + ###Calculate Vg and Ve + markerREML <- tryCatch({marker_h2(data.vector=input,geno.vector=strain.names,K=kinship.matrix,max.iter=40)}, + error=function(e){cat("ERROR :",conditionMessage(e), "\n"); markerREML <- data.frame(cbind(va=NA,ve=NA)); return(markerREML)}) + + ###Calculate heritability + Vg <- markerREML$va + Ve <- markerREML$ve + + h2 <- c((Vg*Vg.factor)/((Vg*Vg.factor)+Ve),Vg,Ve) + names(h2) <- c("h2_REML","Vg","Ve") + + return(h2) + } + + + h2.narrow <- function(map1.output,method,n.perm,Vg.factor,small){ + if(missing(map1.output)){ stop("requires map1 file")} + if(missing(method)){ method <- "REML"} + if(!method %in% c("emma","REML")){ stop("only emma and REML are currently supported")} + if(missing(n.perm)){ n.perm <- 1000} + if(missing(Vg.factor)){ Vg.factor <- 1} + if(missing(small)){ small <- FALSE} + + if(class(map1.output[[1]])=="numeric"){ + small <- TRUE + } + + if(small){ + map1.output$Trait[2,] <- map1.output$Trait[1,] + } + + ###start calculating + if(method == "emma"){ + ###calculate kinship (handy in case of many traits) + tmp <- map1.output$Map + tmp[tmp==0 & !is.na(tmp)] <- 0.5 + tmp[tmp==-1 & !is.na(tmp)] <- 0 + tmp[is.na(tmp)] <- 0.5 + kinship.matrix <- emma.kinship(snps=tmp) + + h2 <- t(apply(map1.output$Trait,1,h2.emma,strain.map=map1.output$Map,kinship.matrix=kinship.matrix,Vg.factor=Vg.factor)) + + ###permutation + perm <- NULL + + map1.perm <- as.list(NULL) + map1.perm[[1]] <- map1.output$Trait + map1.perm[[2]] <- map1.output$Map + names(map1.perm) <- c("Trait","Map") + + for(i in 1:n.perm){ + map1.perm$Trait <- t(apply(map1.perm$Trait,1,function(x){x <- x[order(runif(length(x)))];return(x)})) + h2.perm <- t(apply(map1.perm$Trait,1,h2.emma,strain.map=map1.perm$Map,kinship.matrix=kinship.matrix,Vg.factor=Vg.factor)) + perm <- rbind(perm,h2.perm[,1]) + } + + h2 <- cbind(h2,FDR0.05_emma=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]})) + } + + if(method == "REML"){ + ###calculate kinship (handy in case of many traits) + tmp <- map1.output$Map + tmp[tmp==0 & !is.na(tmp)] <- 0.5 + tmp[tmp==-1 & !is.na(tmp)] <- 0 + tmp[is.na(tmp)] <- 0.5 + kinship.matrix <- emma.kinship(snps=tmp) + rownames(kinship.matrix) <- colnames(map1.output$Map) + colnames(kinship.matrix) <- colnames(map1.output$Map) + + h2 <- t(apply(map1.output$Trait,1,h2.REML,strain.names=colnames(map1.output$Map),kinship.matrix=kinship.matrix,Vg.factor=Vg.factor)) + + ###permutation + perm <- NULL + + map1.perm <- as.list(NULL) + map1.perm[[1]] <- map1.output$Trait + map1.perm[[2]] <- map1.output$Map + names(map1.perm) <- c("Trait","Map") + + for(i in 1:n.perm){ + map1.perm$Trait <- t(apply(map1.perm$Trait,1,function(x){x <- x[order(runif(length(x)))];return(x)})) + h2.perm <- t(apply(map1.perm$Trait,1,h2.REML,strain.names=colnames(map1.perm$Map),kinship.matrix=kinship.matrix,Vg.factor=Vg.factor)) + perm <- rbind(perm,h2.perm[,1]) + } + + h2 <- cbind(h2,FDR0.05_REML=apply(perm,2,function(x){sort(x,na.last = FALSE)[ceiling(n.perm*0.95)]})) + } + output <- as.data.frame(cbind(trait=rownames(h2),round(h2,digits=5))) + output[,1] <- as.character(unlist(output[,1])) + output[,2] <- as.numeric(as.character(unlist(output[,2]))) + output[,3] <- as.numeric(as.character(unlist(output[,3]))) + output[,4] <- as.numeric(as.character(unlist(output[,4]))) + output[,5] <- as.numeric(as.character(unlist(output[,5]))) + + if(small){ + output <- output[1,] + } + return(output) + } + + + \ No newline at end of file diff --git a/functions/QTL_mapping.R b/functions/QTL_mapping.R new file mode 100644 index 0000000..84401f3 --- /dev/null +++ b/functions/QTL_mapping.R @@ -0,0 +1,448 @@ +###eQTL mapping functions + ### Map function, 1 marker, genotypes per column, trait per row + ### map.1 is optimized for dataset in which many markers are un-informative + ### these markers are not mapped but the p-vals and effect are copied from the previous marker. + map.1 <- function(trait.matrix,strain.map,strain.marker){ + if(missing(trait.matrix)){ stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")} + if(missing(strain.map)){ stop("specify strain.map (genetic map)")} + if(ncol(strain.map) != dim(as.matrix(trait.matrix))[2] & + ncol(strain.map) != prod(dim(as.matrix(trait.matrix)))){ stop("number of strains is not equal to number of genotypes in map")} + if(missing(strain.marker)){ strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1])) + rownames(strain.marker) <- 1:dim(strain.map)[1]} + + ###Check for NAs + NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0 + + small <- FALSE + if(dim(as.matrix(trait.matrix))[1] ==1){ + trait.matrix <- rbind(trait.matrix,1) + small <- TRUE + } + + eff.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map)) + pval.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map)) + + for(i in 1:nrow(strain.map)){ + noseg <- length(unique(strain.map[i,])) ==1 + if(noseg){ + output.tmp <- matrix(c(0,0),byrow=TRUE,ncol=2,nrow=nrow(trait.matrix)) + } + if( i == 1 & !noseg){ + if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])} + if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])} + } + if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) != 0 & !noseg){ + if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])} + if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])} + } + if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) == 0 & !noseg){ + output.tmp <- output.tmp + } + eff.out[,i] <- output.tmp[,2] + pval.out[,i] <- output.tmp[,1] + } + if(!small){ + colnames(eff.out) <- rownames(strain.marker) + rownames(eff.out) <- rownames(trait.matrix) + colnames(pval.out) <- rownames(strain.marker) + rownames(pval.out) <- rownames(trait.matrix) + } + if(small){ + traitnames <- rownames(trait.matrix)[1] + eff.out <- matrix(eff.out[1,],nrow=1) + rownames(eff.out) <- traitnames + colnames(eff.out) <- rownames(strain.map) + pval.out <- matrix(pval.out[1,],nrow=1) + rownames(pval.out) <- traitnames + colnames(pval.out) <- rownames(strain.map) + trait.matrix <- matrix(trait.matrix[1,],nrow=1) + rownames(trait.matrix) <- traitnames + colnames(trait.matrix) <- colnames(strain.map) + } + + output <- NULL; output <- as.list(output) + output[[1]] <- round(pval.out,digits=2) + output[[2]] <- round(eff.out,digits=3) + output[[3]] <- trait.matrix + output[[4]] <- strain.map + output[[5]] <- strain.marker + names(output) <- c("LOD","Effect","Trait","Map","Marker") + return(output) + } + + + + + +###Process the mapping data + ###This transforms the relevant output to a dataframe, making handling in dplyr and plotting using ggplot easier + + ###This takes output of map.1 functions (list file with LOD, effect, Trait, and Map + ###And converts it to a list. + ###Optimized (a bit) by generating an empty matrix and filling the columns one-by-one. + ###added an option for single traits + mapping.to.list <- function(map1.output,small){ + if(missing(map1.output)){ stop("Missing map1.output ([[LOD]],[[Effect]],[[Trait]],[[Map]],[[Marker]])")} + if(missing(small)){ small <- FALSE} + + if(class(map1.output[[1]])=="numeric"){ + small <- TRUE + } + + if(small){ + ###Split up to long format + output <- matrix(NA,length(map1.output[[1]]),6) + output[,1] <- rep(rownames(map1.output[[3]])[1],each=length(map1.output[[1]])) + output[,2] <- as.character(unlist(map1.output[[5]][,2])) + output[,3] <- as.numeric(as.character(unlist(map1.output[[5]][,3]))) + output[,4] <- c(1:length(map1.output[[1]])) + output[,5] <- c(map1.output[[1]]) + output[,6] <- c(map1.output[[2]]) + output <- as.data.frame(output) + } + if(!small){ + ###Split up to long format + output <- matrix(NA,ncol(map1.output[[1]])*nrow(map1.output[[1]]),6) + output[,1] <- rep(rownames(map1.output[[1]]),each=ncol(map1.output[[1]])) + output[,2] <- rep(as.character(unlist(map1.output[[5]][,2])),times=nrow(map1.output[[1]])) + output[,3] <- rep(as.numeric(as.character(unlist(map1.output[[5]][,3]))),times=nrow(map1.output[[1]])) + output[,4] <- rep(c(1:ncol(map1.output[[1]])),times=nrow(map1.output[[1]])) + output[,5] <- c(t(map1.output[[1]])) + output[,6] <- c(t(map1.output[[2]])) + output <- as.data.frame(output) + } + + ###overwrite dataframe formats + output[,1] <- as.character(unlist(output[,1])) + output[,2] <- as.character(unlist(output[,2])) + output[,3] <- as.numeric(as.character(unlist(output[,3]))) + output[,4] <- as.numeric(as.character(unlist(output[,4]))) + output[,5] <- as.numeric(as.character(unlist(output[,5]))) + output[,6] <- as.numeric(as.character(unlist(output[,6]))) + colnames(output) <- c("trait","qtl_chromosome","qtl_bp","qtl_marker","qtl_significance","qtl_effect") + + ###Return + return(output) + } + + + ###This takes output of mapping.to.list function and identiefies peaks and confidence intervals v1.3 + ###Based on a given threshold and a 1.5 LOD-drop. + + ###v1.2 made the function more efficient, the peak borders are selected in one processing + ###v1.3 fixed the problem calling the peak if the peak borders the edge of the chromosome + ###v1.4 fixed larger then, smaller then bug + ###v1.5 fixed 'incompatible types' bug; NA is not recognized as numeric + + + peak.finder <- function(list.file,threshold,LOD.drop){ + if(missing(list.file)){ stop("Missing list.file (trait, qtl_chromosome, qtl_bp, qtl_marker, qtl_significance, qtl_effect)")} + if(missing(threshold)){ stop("Missing threshold")} + if(missing(LOD.drop)){ LOD.drop <- 1.5} + + ###locate the peak + peaks <- dplyr::filter(list.file, qtl_significance >= threshold-LOD.drop) %>% + dplyr::group_by(trait,qtl_chromosome) %>% + dplyr::filter(qtl_significance == max(qtl_significance),max(qtl_significance)>=threshold) %>% + dplyr::filter(abs(qtl_marker - mean(qtl_marker)) == min(abs(qtl_marker - mean(qtl_marker)))) %>% + dplyr::filter(qtl_marker == min(qtl_marker)) %>% + as.data.frame() %>% + dplyr::mutate(qtl_peak=qtl_marker) + + ###locate left boundary + peaks_left <- dplyr::filter(list.file,trait %in% unique(peaks$trait)) %>% + dplyr::group_by(trait,qtl_chromosome) %>% + dplyr::mutate(Border=ifelse((qtl_significance < max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold),T, + ifelse((qtl_bp == min(qtl_bp) & min(qtl_bp) %in% qtl_bp[qtl_significance >= max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold]),T, + ifelse((qtl_bp == max(qtl_bp) & max(qtl_bp) %in% qtl_bp[qtl_significance >= max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold]),T,F))), + peakloc=qtl_marker[which.max(qtl_significance)]) %>% + dplyr::filter(Border) %>% + dplyr::mutate(qtl_marker_left=as.numeric(ifelse(qtl_marker == max(qtl_marker[qtl_marker <= peakloc]),qtl_marker,NA)), + qtl_bp_left=as.numeric(ifelse(qtl_bp == max(qtl_bp[qtl_marker <= peakloc]),qtl_bp,NA)), + qtl_marker_right=as.numeric(ifelse(qtl_marker == min(qtl_marker[qtl_marker >= peakloc]),qtl_marker,NA)), + qtl_bp_right=as.numeric(ifelse(qtl_bp == min(qtl_bp[qtl_marker >= peakloc]),qtl_bp,NA))) %>% + dplyr::filter(!is.na(qtl_bp_left) | !is.na(qtl_bp_right)) %>% + dplyr::select(-Border,-peakloc) + + peaks_right <- dplyr::filter(peaks_left,!is.na(qtl_bp_right)) + peaks_left <- dplyr::filter(peaks_left,!is.na(qtl_bp_left)) + + ###Merge the findings + peaks <- cbind(peaks,peaks_left$qtl_marker_left,peaks_left$qtl_bp_left,peaks_right$qtl_marker_right,peaks_right$qtl_bp_right) + ###Prepare input + output <- dplyr::mutate(list.file,qtl_peak = NA,qtl_marker_left = NA,qtl_bp_left = NA,qtl_marker_right = NA,qtl_bp_right = NA) + ###Merge peaks and input + output[match(paste(peaks[,1],peaks[,2],peaks[,3]),paste(output[,1],output[,2],output[,3])),7:11] <- peaks[,7:11] + output <- dplyr::mutate(output,qtl_bp_left=ifelse(qtl_bp_left > qtl_bp,qtl_bp,qtl_bp_left),qtl_bp_right=ifelse(qtl_bp_right<qtl_bp,qtl_bp,qtl_bp_right)) + + ###return called peaks + return(output) + } + + + ###Makes a table with eQTL peaks v1.1 + ###peak.list.file: output from peak.finder + ###trait.annotation: gene ID's of the mapped traits + ###cis.trans.window: Cis-trans windowd can be defined based on physical distance "physical", based on LOD-drop interval "LOD.drop", or on both "both" + + eQTL.table <- function(peak.list.file,trait.annotation,cis.trans.window,cis.trans.window.size,cis.trans.window.lodinterval,colnames.trait.annotations){ + if(missing(peak.list.file)){ stop("need an eQTL list file")} + if(missing(trait.annotation)){ stop("need annotation file with order: trait, gene_chromosome, gene_bp, gene_WBID, gene_sequence_name, gene_public_name")} + if(missing(cis.trans.window)){ cis.trans.window <- "both"} + if(!cis.trans.window %in% c("physical","LOD.drop","both")){ stop("cis.trans.window should be in physical, LOD.drop, or both")} + if(missing(cis.trans.window.size)){ cis.trans.window.size <- 1000000} + if(missing(colnames.trait.annotations)){ colnames.trait.annotations <- c("trait","gene_chromosome","gene_bp","gene_WBID","gene_sequence_name","gene_public_name")} + + ###Rename colnames traits + colnames(trait.annotation) <- colnames.trait.annotations + + ###Take only the peaks + if(cis.trans.window =="physical"){ + output <- dplyr::filter(peak.list.file,!is.na(qtl_peak)) %>% + merge(trait.annotation,by.x=1,by.y=1) %>% + dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & abs(qtl_bp-gene_bp) < cis.trans.window.size),"cis","trans")) + } + + if(cis.trans.window =="LOD.drop"){ + output <- dplyr::filter(peak.list.file,!is.na(qtl_peak)) %>% + merge(trait.annotation,by.x=1,by.y=1) %>% + dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & (gene_bp > qtl_bp_left & gene_bp < qtl_bp_right)),"cis","trans")) + } + + if(cis.trans.window =="both"){ + output <- dplyr::filter(peak.list.file,!is.na(qtl_peak)) %>% + merge(trait.annotation,by.x=1,by.y=1) %>% + dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & abs(qtl_bp-gene_bp) < cis.trans.window.size) | + (qtl_chromosome==gene_chromosome & (gene_bp > qtl_bp_left & gene_bp < qtl_bp_right)),"cis","trans")) + } + + ###Return file + return(output) + } + + + ###Makes a table with eQTL peaks for plotting + ###this function adds points to faithfully indicate the chromosome boundaries (standard is C. elegans)) + prep.ggplot.eQTL.table <- function(eQTL.table.file,condition,chromosomes,chromosome_size){ + if(missing(eQTL.table.file)){ stop("need a eQTL.table.file")} + if(missing(condition)){ condition <- "ct"} + if(missing(chromosomes)){ chromosomes <- c("I","II","III","IV","V","X")} + if(missing(chromosome_size)){ chromosome_size <- c(15072434,15279421,13783801,17493829,20924180,17718942)} + if(length(chromosomes) != length(chromosome_size)){ stop("chromosome names and sizes should have the same length")} + + output <- dplyr::filter(eQTL.table.file, gene_chromosome %in% chromosomes) %>% + dplyr::mutate(gene_chromosome = factor(gene_chromosome,levels=rev(chromosomes))) %>% + dplyr::mutate(qtl_chromosome = factor(qtl_chromosome,levels=chromosomes),condition=condition) %>% + dplyr::mutate(qtl_type = factor(qtl_type,levels=rev(sort(unique(qtl_type))))) + + ###Chromosome sizes cheat + cheatpoints <- output[rep(1,times=length(chromosomes)*2),] + for(i in which(unlist(lapply(output,class)) == "factor")){ + cheatpoints[,i] <- as.character(unlist(cheatpoints[,i])) + } + cheatpoints[1:nrow(cheatpoints),unlist(lapply(output,class)) != "character"] <- 0 + cheatpoints[1:nrow(cheatpoints),unlist(lapply(output,class)) != "numeric"] <- "" + cheatpoints$qtl_chromosome <- as.character(rep(chromosomes,times=2)) + cheatpoints$gene_chromosome <- as.character(rep(chromosomes,times=2)) + cheatpoints$qtl_bp <- as.numeric(as.character(c(chromosome_size,rep(1,times=length(chromosomes))))) + cheatpoints$gene_bp <- as.numeric(as.character(c(chromosome_size,rep(1,times=length(chromosomes))))) + cheatpoints$trait <- paste(as.character(rep(chromosomes,times=2)),rep(c(1,2),each=length(chromosomes))) + + + ###combine + output <- rbind(output,cheatpoints) + + ###return + return(output) + } + + + ################################################################################################################################################################ + ### Add.Rsquared(R2.map,QTL.prep.input) v1.1 + ### This function will add the sum of squares of the used model to a QTL table output, it also calculates the r-squared of the model + ### obligatory input + ### R2.map: output of the eQTL.table function (requires marker numbers and trait names (as in th eprep.for.QTL function)) + ### QTL.prep.input: output of the prep.for.QTL function (list, first matrix is the data, second matrix the corresponding map) + ### v1.1 fixed a bug where, if the marker was a factor, wrong data was selected + ### c("LOD","Effect","Trait","Map","Marker") + + eQTL.table.addR2 <- function(eQTL.table.file,QTL.prep.file){ + if(missing(eQTL.table.file)){ stop("Give a QTL summary table, should contain marker locations (qtl_marker, qtl_marker_1, qtl_marker_2) and trait names")} + if(missing(QTL.prep.file)){ stop("Give the input on which was mapped (output of QTL.data.prep)")} + + ###True if single marker mapping + if("qtl_marker" %in% colnames(eQTL.table.file)){ + output <- NULL + for(i in 1:nrow(eQTL.table.file)){ + ###get marker + mrk <- QTL.prep.file$Map[as.numeric(as.character(unlist(eQTL.table.file$qtl_marker[i]))),] + ###get expression + y <- unlist(QTL.prep.file$Trait[rownames(QTL.prep.file$Trait)==as.character(unlist(eQTL.table.file$trait[i])),]) + ###run model + output <- c(output,R.sq(y,mrk)) + } + output <- data.frame(cbind(eQTL.table.file,qtl_R2_sm=output)) + } + + ###True if multiple marker mapping + if("marker_1" %in% colnames(eQTL.table.file) & "marker_2" %in% colnames(eQTL.table.file)){ + output <- cbind(eQTL.table.file,SS_int_mrk1=NA,SS_int_mrk2=NA,SS_int=NA,SS_int_res=NA,SS_add_mrk1=NA,SS_add_mrk2=NA,SS_add_res=NA) + for(i in 1:nrow(eQTL.table.file)){ + ###get marker + mrk1 <- QTL.prep.file$genotype.map[eQTL.table.file$marker_1[i],] + mrk2 <- QTL.prep.file$genotype.map[eQTL.table.file$marker_2[i],] + ###get expression + y <- unlist(QTL.prep.file$trait.matrix[rownames(QTL.prep.file$trait.matrix)==eQTL.table.file$trait[i],]) + ###run model + output[i,colnames(output)%in%c("SS_int_mrk1","SS_int_mrk2","SS_int","SS_int_res")] <- anova(lm(y ~ mrk1*mrk2))$"Sum Sq" + output[i,colnames(output)%in%c("SS_add_mrk1","SS_add_mrk2","SS_add_res")] <- anova(lm(y ~ mrk1+mrk2))$"Sum Sq" + + } + output <- dplyr::mutate(output,r2_add=(SS_add_mrk1+SS_add_mrk2)/(SS_add_mrk1+SS_add_mrk2+SS_add_res),r2_int=SS_int/(SS_int_mrk1+SS_int_mrk2+SS_int+SS_int_res)) + } + return(output) + } + + + + prep.ggplot.QTL.profile <- function(QTL.peak.output,map1.output,trait){ + output <- list() + + output[[1]] <- QTL.peak.output[as.character(unlist(QTL.peak.output$trait))==trait,] + names(output)[1] <- "QTL_profile" + tmp <- output[[1]]$qtl_peak[!is.na(output[[1]]$qtl_peak)] + + if(length(tmp)>0){ + for(i in 1:length(tmp)){ + output[[i+1]] <- cbind(trait=trait, + map1.output$Marker[tmp[i],], + strain=colnames(map1.output$Trait), + genotype=map1.output$Map[tmp[i],], + trait_value=map1.output$Trait[rownames(map1.output$Trait)==trait,], + R_squared=R.sq(x=map1.output$Map[tmp[i],],y=map1.output$Trait[rownames(map1.output$Trait)==trait,])) + names(output)[i+1] <- paste("QTLsplit_",as.character(unlist(map1.output$Marker[tmp[i],1])),sep="_") + } + } + return(output) + } + + + map.2 <- function(trait.matrix,strain.map,strain.marker,method){ + if(missing(trait.matrix)){ stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")} + if(missing(strain.map)){ stop("specify strain.map (genetic map)")} + if(ncol(strain.map) != dim(as.matrix(trait.matrix))[2] & + ncol(strain.map) != prod(dim(as.matrix(trait.matrix)))){ stop("number of strains is not equal to number of genotypes in map")} + if(missing(strain.marker)){ strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1])) + rownames(strain.marker) <- 1:dim(strain.map)[1]} + if(missing(method)){ method <- "Additive"} + if(!method %in% c("Additive","Interaction")){ stop("method should be Additive or Interaction")} + + + small <- FALSE + if(dim(as.matrix(trait.matrix))[1] ==1){ + trait.matrix <- rbind(trait.matrix,1) + small <- TRUE + } + + ###Check for NAs, and remove them (script is too slow) + NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0 + if(NAs){ + warning("NAs in trait.matrix will be replaced by the mean value of that trait, the NAs in strain.map will be replaced by 0") + for(i in 1:nrow(trait.matrix)){ + trait.matrix[i,is.na(trait.matrix[i,])] <- mean(trait.matrix[,i],na.rm=T) + } + strain.map[is.na(strain.map)] <- 0 + } + + #map it + variable1 <- cbind(as.numeric(t(strain.map)),rep(1:nrow(strain.map),each=ncol(strain.map))) + tmp <- list() + + for(i in 1:nrow(strain.map)){ + variable2 <- strain.map[i,] + if(method=="Additive"){ + tmp[[i]] <- tapply(variable1[,1],variable1[,2],fast.lm.2.add,traits=trait.matrix,variable2=variable2) + } + if(method =="Interaction"){ + tmp[[i]] <- tapply(variable1[,1],variable1[,2],fast.lm.2.int,traits=trait.matrix,variable2=variable2) + } + tmp[[i]] <- do.call("rbind",tmp[[i]]) + tmp[[i]] <- cbind(marker_1=rep(unique(variable1[,2]),each=nrow(trait.matrix)),marker_2=i,tmp[[i]]) + } + + tmp <- do.call("rbind",tmp) + + output <- list() + output[[1]] <- tmp + output[[2]] <- trait.matrix + output[[3]] <- strain.map + output[[4]] <- strain.marker + names(output) <- c(method,"Trait","Map","Marker") + + return(output) + } + + + map.2.prune <- function(map.2.output,pval.min = 0.05,distance = 1e6,method = "fdr"){ + if(missing(map.2.output)){ stop("require output from map.2")} + + if(ncol(map.2.output[[1]]) == 6){ + map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]][,1] < map.2.output[[1]][,2],] + map.2.output[[1]] <- cbind(map.2.output[[1]], + pval1_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,3],method=method)),digits=2), + pval2_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,4],method=method)),digits=2)) + map.2.output[[1]] <- map.2.output[[1]][(map.2.output[[1]][,3] > -log10(pval.min) | map.2.output[[1]][,4] > -log10(pval.min)) & + !is.na(map.2.output[[1]][,3]) & !is.na(map.2.output[[1]][,4]),] + map.2.output[[1]] <- data.frame(cbind(map.2.output[[1]], + trait=rownames(map.2.output[[1]]), + marker_1_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,1],2])), + marker_1_position=map.2.output$Marker[map.2.output[[1]][,1],3], + marker_2_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,2],2])), + marker_2_position=map.2.output$Marker[map.2.output[[1]][,2],3])) + for(i in c(1:8,11,13)){ + map.2.output[[1]][,i] <- as.numeric(as.character(unlist(map.2.output[[1]][,i]))) + } + for(i in c(9,10,12)){ + map.2.output[[1]][,i] <- as.character(unlist(map.2.output[[1]][,i])) + } + map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]]$marker_1_chromosome != map.2.output[[1]]$marker_2_chromosome | + abs(map.2.output[[1]]$marker_1_position - map.2.output[[1]]$marker_2_position) > distance,] + } + + if(ncol(map.2.output[[1]]) == 8){ + map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]][,1] < map.2.output[[1]][,2],] + map.2.output[[1]] <- cbind(map.2.output[[1]], + pval1_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,3],method=method)),digits=2), + pval2_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,4],method=method)),digits=2), + pvali_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,5],method=method)),digits=2)) + map.2.output[[1]] <- map.2.output[[1]][(map.2.output[[1]][,3] > -log10(pval.min) | map.2.output[[1]][,4] > -log10(pval.min) | map.2.output[[1]][,5] > -log10(pval.min)) & + !is.na(map.2.output[[1]][,3]) & !is.na(map.2.output[[1]][,4]) & !is.na(map.2.output[[1]][,5]),] + map.2.output[[1]] <- data.frame(cbind(map.2.output[[1]], + trait=rownames(map.2.output[[1]]), + marker_1_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,1],2])), + marker_1_position=map.2.output$Marker[map.2.output[[1]][,1],3], + marker_2_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,2],2])), + marker_2_position=map.2.output$Marker[map.2.output[[1]][,2],3])) + for(i in c(1:10,13,15)){ + map.2.output[[1]][,i] <- as.numeric(as.character(unlist(map.2.output[[1]][,i]))) + } + for(i in c(11,12,14)){ + map.2.output[[1]][,i] <- as.character(unlist(map.2.output[[1]][,i])) + } + map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]]$marker_1_chromosome != map.2.output[[1]]$marker_2_chromosome | + abs(map.2.output[[1]]$marker_1_position - map.2.output[[1]]$marker_2_position) > distance,] + } + + return(map.2.output) + } + + + + + + + + + + + diff --git a/functions/QTL_mapping_more.R b/functions/QTL_mapping_more.R new file mode 100644 index 0000000..ec38948 --- /dev/null +++ b/functions/QTL_mapping_more.R @@ -0,0 +1,32 @@ +###interactions between multiple peaks + +map.1.interactions <- function(eQTL.table.file,map1.output){ + if(missing(eQTL.table.file)){ stop("needs output from eQTL.table")} + if(missing(map1.output)){ stop("needs the map1 output file")} + + traits <- eQTL.table.file$trait[duplicated(eQTL.table.file$trait)] + + output <- NULL + for(i in 1:length(traits)){ + traitrow <- which(rownames(map1.output$Trait) == traits[i]) + maprows <- eQTL.table.file$qtl_peak[eQTL.table.file$trait == traits[i]] + + lm_model_additive <- paste("lm(map1.output$Trait[",traitrow,",] ~ ",paste(paste("map1.output$Map[",maprows,",]",sep=""),collapse=" + "),")",sep="") + lm_model_interaction <- paste("lm(map1.output$Trait[",traitrow,",] ~ ",paste(paste("map1.output$Map[",maprows,",]",sep=""),collapse=" * "),")",sep="") + + output_additive <- summary(eval(parse(text=lm_model_additive))) + output_interaction <- summary(eval(parse(text=lm_model_interaction))) + + output <- rbind(output,cbind(trait=traits[i],model="additive",markers=rownames(output_additive[[4]])[-1],effect=output_additive[[4]][,1][-1],significance=output_additive[[4]][,4][-1],Rsquared=output_additive[[9]])) + output <- rbind(output,cbind(trait=traits[i],model="interaction",markers=rownames(output_interaction[[4]])[-1],effect=output_interaction[[4]][,1][-1],significance=output_interaction[[4]][,4][-1],Rsquared=output_interaction[[9]])) + } + output[,3] <- gsub("map1.output$Map[","",output[,3],fixed=TRUE) + output[,3] <- gsub(", ]","",output[,3],fixed=TRUE) + rownames(output) <- c(1:nrow(output)) + output <- data.frame(output) + for(i in c(1:3)){output[,i] <- as.character(unlist(output[,i]))} + for(i in c(4:6)){output[,i] <- as.numeric(as.character(unlist(output[,i])))} + output <- cbind(output,p_val=-log10(output[,5])) + + return(output) + } \ No newline at end of file diff --git a/functions/QTL_simulation_permutation.R b/functions/QTL_simulation_permutation.R new file mode 100644 index 0000000..217fc87 --- /dev/null +++ b/functions/QTL_simulation_permutation.R @@ -0,0 +1,256 @@ + ###Permutation 1 marker mapping, genotypes per column, traits per row + map.1.perm <- function(trait.matrix,strain.map,strain.marker,n.perm){ + if(missing(trait.matrix)){ stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")} + if(missing(strain.map)){ stop("specify strain.map (genetic map)")} + if(ncol(strain.map) != dim(as.matrix(trait.matrix))[2] & + ncol(strain.map) != prod(dim(as.matrix(trait.matrix)))){ stop("number of strains is not equal to number of genotypes in map")} + if(missing(strain.marker)){ strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1])) + rownames(strain.marker) <- 1:dim(strain.map)[1]} + if(missing(n.perm)){ n.perm <- 1000} + if(n.perm == 1 & dim(as.matrix(trait.matrix))[2] ==1){ stop("cannot conduct 1 permutation on 1 trait, set n.perm > 1")} + + + NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0 + + small <- FALSE + if(dim(as.matrix(trait.matrix))[1] ==1){ + trait.matrix <- rbind(trait.matrix,1) + small <- TRUE + } + + trait.matrix <- trait.matrix[rep(1:nrow(trait.matrix),each=n.perm),] + perm.data <- permutate.traits(trait.matrix) + rownames(perm.data) <- paste(rownames(trait.matrix),1:n.perm,sep="_") + + if(small){ + perm.data <- perm.data[1:n.perm,] + } + + eff.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map)) + pval.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map)) + + for(i in 1:nrow(strain.map)){ + noseg <- length(unique(strain.map[i,])) ==1 + if(noseg){ + output.tmp <- matrix(c(0,0),byrow=TRUE,ncol=2,nrow=nrow(perm.data)) + } + if( i == 1 & !noseg){ + if(!NAs){output.tmp <- fast.lm.1(perm.data,strain.map[i,])} + if(NAs){output.tmp <- lm.1(perm.data,strain.map[i,])} + } + if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) != 0 & !noseg){ + if(!NAs){output.tmp <- fast.lm.1(perm.data,strain.map[i,])} + if(NAs){output.tmp <- lm.1(perm.data,strain.map[i,])} + } + if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) == 0 & !noseg){ + output.tmp <- output.tmp + } + eff.out[,i] <- output.tmp[,2] + pval.out[,i] <- output.tmp[,1] + } + + colnames(eff.out) <- rownames(strain.marker) + rownames(eff.out) <- rownames(trait.matrix) + colnames(pval.out) <- rownames(strain.marker) + rownames(pval.out) <- rownames(trait.matrix) + output <- NULL; output <- as.list(output) + output[[1]] <- round(pval.out,digits=2) + output[[2]] <- round(eff.out,digits=3) + output[[3]] <- perm.data + output[[4]] <- strain.map + output[[5]] <- strain.marker + names(output) <- c("LOD","Effect","Trait_perm","Map","Marker") + return(output) + } + + ###Based on Benjamini and Yakuteili multiple testing under dependency + map.perm.fdr <- function(map1.output,filenames.perm,FDR_dir,q.value,small){ + if(missing(map1.output)){ stop("Missing map1.output ([[LOD]],[[Effect]],[[Trait]],[[Map]],[[Marker]])")} + if(missing(filenames.perm)){ stop("Need a vector with filenames for permutation")} + if(missing(FDR_dir)){ FDR_dir <- getwd()} + if(missing(q.value)){ q.value <- 0.025} + if(missing(small)){ small <- FALSE} + + output <- as.list(NULL) + + output[[2]] <- matrix(0,ncol=length(filenames.perm),nrow=5000) + rownames(output[[2]]) <- seq(0.1,500,by=0.1) + colnames(output[[2]]) <- filenames.perm + + ###in case of one trait + if(small){ + if(length(filenames.perm) > 1){ + stop("needs one permutatation file, with n permutations") + } + tmp <- load(file=paste(FDR_dir,filenames.perm,sep="")) + ###No NA's + tmp <- get(tmp)[[1]] + tmp[is.na(tmp)] <- 0.1 + + ###no Non-traits + tmp <- tmp[rownames(tmp) != "",] + output[[2]] <- apply(tmp,1,max,na.rm=T) + + if((length(output[[2]]) * q.value) < 10){ + warning(paste("inadvisable to calculate q=",q.value,"based on ",length(output[[2]])," permutations. Increase to at least ", ceiling(10/q.value))) + } + + ###take the q-cutoff, *10 is to be consistent with eQTL methodology (is per 0.1) + output[[1]] <- rev(sort(output[[2]]))[ceiling(q.value*length(output[[2]]))]*10 + + ###again, to be consistent; + output[[3]] <- mean(output[[2]],na.rm = TRUE) + + ###real data + output[[4]] <- max(map1.output$LOD,na.rm=TRUE) + + names(output) <- c("Significance_threshold","False_discoveries","False_discoveries_average","Real_discoveries") + } + if(!small){ + for(i in 1:length(filenames.perm)){ + tmp <- load(file=paste(FDR_dir,filenames.perm[i],sep="")) + ###No NA's + tmp <- get(tmp)[[1]] + tmp[is.na(tmp)] <- 0.1 + + tmp <- table(apply(round(tmp,digits=1),1,max,na.rm=T)) + + output[[2]][(as.numeric(names(tmp))*10),i] <- tmp + output[[2]][,i] <- rev(cumsum(rev(output[[2]][,i]))) + } + ###Take the average over permutations + output[[3]] <- apply(output[[2]],1,mean) + + ###Calculate the real discoveries, no NA's + tmp <- map1.output$LOD + tmp[is.na(tmp)] <- 0.1 + + RDS.tmp <- table(round(apply(tmp,1,max,na.rm=T),digits=1)) + RDS <- rep(0,times=5000); RDS[(as.numeric(names(RDS.tmp))*10)] <- RDS.tmp + RDS <- rev(cumsum(rev(RDS))) + + output[[4]] <- RDS + + output[[1]] <- which(round(((dim(map1.output$LOD)[1]-RDS)/dim(map1.output$LOD)[1])*q.value*log10(dim(map1.output$LOD)[1]),digits=3)-round(output[[3]]/output[[4]],digits=3)>0)[1] + names(output) <- c("Significance_threshold","False_discoveries","False_discoveries_average","Real_discoveries") + } + + ###Return + return(output) + } + +###auxiliary function for data simulation +simulate.data.per.marker <- function(strain.map,strain.marker,n_per_marker,sigma_peak,sigma_error){ + sim.map.file <- list() + + sim.map.file[[2]] <- data.matrix(strain.map) + types <- unique(as.numeric(as.character(unlist(sim.map.file[[2]])))) + sim.map.file[[2]] <- sim.map.file[[2]][rep(1:nrow(sim.map.file[[2]]),each=n_per_marker),] + + sim.map.file[[1]] <- sim.map.file[[2]] + sim.map.file[[1]][sim.map.file[[2]]==types[1]] <- sigma_peak + sim.map.file[[1]][sim.map.file[[2]]==types[2]] <- 0 + sim.map.file[[1]] <- sim.map.file[[1]] + rnorm(n=length(sim.map.file[[1]]),mean=0,sd=sigma_error) + rownames(sim.map.file[[1]]) <- paste(rownames(sim.map.file[[1]]),rep(1:n_per_marker,times=nrow(sim.map.file[[2]])/n_per_marker),sep="_") + + sim.map.file[[2]] <- sim.map.file[[2]][seq(1,nrow(sim.map.file[[2]]),by=n_per_marker),] + + sim.map.file[[3]] <- strain.marker + + return(sim.map.file) + } + +###Conducts mapping simulations to investigate power +map.1.simulation <- function(strain.map,strain.marker,n_per_marker,sigma_peak,sigma_error,threshold){ + if(missing(strain.map)){ stop("specify strain.map (genetic map)")} + if(missing(strain.marker)){ strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1])) + rownames(strain.marker) <- 1:dim(strain.map)[1]} + if(missing(n_per_marker)){ n_per_marker <-10} + if(missing(sigma_peak)){ sigma_peak <- c(1,1.15,1.3,1.47,1.64,1.82,2,2.2,2.47,2.73,3.05,3.45,4)} + if(missing(sigma_error)){ sigma_error <- 1} + if(missing(threshold)){ threshold <- c(3.5)} + ###1 sigma difference on 1 sigma noise is 20% of variance + ###2 sigma difference on 1 sigma noise is 50% of variance + ###3 sigma difference on 1 sigma noise is 69% of variance + ###4 sigma difference on 1 sigma noise is 80% of variance; summary(lm(c(rnorm(10000,0,1),rnorm(10000,4,1))~rep(c(-1,1),each=10000))) + ###cbind(var_expl=seq(5,80,by=5),sigma=c(0.46,0.67,0.84,1,1.15,1.3,1.47,1.64,1.82,2,2.2,2.47,2.73,3.05,3.45,4)) + + out <- NULL + for(i in sigma_peak){ + ###Generate peaked data with random error + sim.map.file <- simulate.data.per.marker(strain.map,strain.marker,n_per_marker,sigma_peak=i,sigma_error) + sim.emp.file <- simulate.data.per.marker(strain.map,strain.marker,n_per_marker,sigma_peak=0,sigma_error) + + ###Map it + sim.map.file <- map.1(sim.map.file[[1]],sim.map.file[[2]],sim.map.file[[3]]) + + loc_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,which.max)))) + lod_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,max,na.rm=TRUE)))) + eff_peak <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_peak] + + loc_sim <- rep(1:nrow(sim.map.file$Map),each=n_per_marker) + lod_sim <- t(sim.map.file$LOD)[(ncol(sim.map.file$LOD)*(0:(nrow(sim.map.file$LOD)-1)))+loc_sim] + eff_sim <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_sim] + + out.sim <- NULL + out.sim <- c(out.sim,detected_true=sum(sim.map.file$Marker[loc_peak,2] == sim.map.file$Marker[loc_sim,2] & lod_peak-1.5 < lod_sim & lod_peak >= threshold)) + out.sim <- c(out.sim,detected_false=(sum(lod_peak >= threshold) - sum(sim.map.file$Marker[loc_peak,2] == sim.map.file$Marker[loc_sim,2] & lod_peak-1.5 < lod_sim & lod_peak >= threshold))) + out.sim <- c(out.sim,detected_not = sum(lod_peak < threshold)) + selc <- sim.map.file$Marker[loc_peak,2] == sim.map.file$Marker[loc_sim,2] & lod_peak-1.5 < lod_sim & lod_peak >= threshold + out.sim <- c(out.sim,detected_true_eff_est=quantile(abs(eff_peak[selc])/(i/2))) + out.sim <- c(out.sim,detected_true_loc_est=quantile(abs(sim.map.file$Marker[loc_peak,3]-sim.map.file$Marker[loc_sim,3])[selc])) + + ###Map False file + sim.map.file <- map.1(sim.emp.file[[1]],sim.emp.file[[2]],sim.emp.file[[3]]) + + loc_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,which.max)))) + lod_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,max,na.rm=TRUE)))) + eff_peak <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_peak] + + loc_sim <- rep(1:nrow(sim.map.file$Map),each=n_per_marker) + lod_sim <- t(sim.map.file$LOD)[(ncol(sim.map.file$LOD)*(0:(nrow(sim.map.file$LOD)-1)))+loc_sim] + eff_sim <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_sim] + + out.emp <- NULL + out.emp <- c(out.emp,detected_true=0) + out.emp <- c(out.emp,detected_false=sum(lod_peak >= threshold)) + out.emp <- c(out.emp,detected_not = sum(lod_peak < threshold)) + + out.emp <- c(out.emp,detected_true_eff_est=quantile(rep(NA,times=100),na.rm=T)) + out.emp <- c(out.emp,detected_true_loc_est=quantile(rep(NA,times=100),na.rm=T)) + ###Combine and summarize + out.sim <- data.frame(cbind(name=c(paste("peak_sim_",i,sep=""),paste("noise_sim_",sigma_error,sep="")), + var_explained=c(round(summary(lm(c(rnorm(100000,0,sigma_error),rnorm(100000,i,sigma_error))~rep(c(-1,1),each=100000)))$adj.r.squared,digits=2),round(summary(lm(c(rnorm(100000,0,sigma_error),rnorm(100000,0,sigma_error))~rep(c(-1,1),each=100000)))$adj.r.squared,digits=2)), + rbind(out.sim,out.emp))) + + out <- rbind(out,out.sim) + } + + ###Make sure numbers are numeric and characters are characteristic + for(i in 2:ncol(out)){ + out[,i] <- as.numeric(as.character(unlist(out[,i]))) + } + out[,1] <- as.character(unlist(out[,1])) + + + ###summarize all noise simulations + out[max(grep("noise_sim",out[,1])),-1] <- apply(out[grep("noise_sim",out[,1]),-1],2,sum) + out <- out[c(grep("peak_sim",out[,1]),max(grep("noise_sim",out[,1]))),] + + ##Add percentage detected & percentage false + output <- cbind(out, + percentage_detected=round(out$detected_true/(out$detected_true+out$detected_false+out$detected_not),digits=3)*100, + percentage_false=round(out$detected_false/(out$detected_true+out$detected_false+out$detected_not),digits=3)*100, + type=unlist(strsplit(out$name,split="_"))[seq(1,nrow(out)*3,by=3)], + sigma=unlist(strsplit(out$name,split="_"))[seq(3,nrow(out)*3,by=3)]) + + output <- output[,c(18,19,2,3:5,16,17,6:15)] + colnames(output)[grepl("_est.",colnames(output))] <- paste("quantile_",unlist(strsplit(colnames(output)[grepl("_est.",colnames(output))],split="_est."))[seq(2,2*sum(grepl("_est.",colnames(output))),by=2)],sep="") + colnames(output) <- gsub("\\.","",colnames(output)) + + colnames(output) <- paste(c(rep("Simulation",times=3),rep("QTL_detection",times=5),rep("Effect_size_estimation_(quantiles)",times=5),rep("QTL-true_peak_location_distance_(quantiles)",times=5)),colnames(output),sep=".") + + + return(output) + } + diff --git a/functions/eQTL_based_analyses.R b/functions/eQTL_based_analyses.R new file mode 100644 index 0000000..60272fb --- /dev/null +++ b/functions/eQTL_based_analyses.R @@ -0,0 +1,126 @@ + + + ################################################################################################################################################################ + ###eQTL_correlator(trait.dataframe,strain.map,strain.marker,eQTL.table,ref.genotype,method,n.genes,interval) + ### predicts the genotype based on eQTL information, it returns a list with correlations. + ### obligatory input + ### eQTL.table.output, output from the eQTL.table function + ### trait.dataframe, preferrably data centered to the parental mean, otherwise z.scores or ratio with the mean of the entire experiment could be used + ### optional input + ### ref.genotype, standard is "1" + ### n.genes, standard is 20 + ### method, standard is "per.genes", other option is "interval" + ### interval, standard is 1e6 + + eQTL.correlator <- function(trait.dataframe,eQTL.table,ref.genotype,method,n.genes,interval){ + if(missing(trait.dataframe)){ stop("specify trait.dataframe, columns: trait, strain, ratio, sample")} + if(sum(c("trait","strain","ratio","sample") %in% colnames(trait.dataframe)) != 4){ + stop("specify trait.dataframe, columns: trait, strain, ratio, sample")} + if(missing(eQTL.table)){ stop("specify eQTL.table")} + ###Parameters + if(missing(ref.genotype)){ ref.genotype <- 1} + if(missing(method)){ method <- "per.genes"} + if(missing(n.genes) & method=="per.genes"){ n.genes <- 20} + if(missing(interval)& method=="interval"){ interval <- 1e6} + if(method %in% c("per.genes","interval") ==FALSE){ stop("choose a method from 'per.genes' and 'interval'")} + + + ###Take only one type of eQTL + eQTL.table <- eQTL.table[eQTL.table$qtl_type == unique(eQTL.table$qtl_type)[1], + colnames(eQTL.table) %in% c("trait","qtl_chromosome","qtl_bp","qtl_marker","qtl_significance","qtl_effect","gene_chromosome","gene_bp","gene_WBID")] + eQTL.table <- merge(eQTL.table,trait.dataframe) + + + ###calculate correlation per n.genes, 20 works pretty well + if(method =="per.genes"){ + out <- group_by(eQTL.table,strain,gene_chromosome,sample) %>% + mutate(order_gene=ceiling(rank(gene_bp)/n.genes)) %>% + as.data.frame() %>% + group_by(strain,gene_chromosome,order_gene,sample) %>% + mutate(n.cor=length(qtl_effect)) %>% + as.data.frame() %>% + mutate(order_gene = ifelse(n.cor<n.genes/2,order_gene-1,order_gene)) %>% + group_by(strain,gene_chromosome,order_gene,sample) %>% + summarise(correlation = cor(x=ratio,y=qtl_effect*ref.genotype,use="complete.obs"), + correlation_significance=cor.test(x=ratio,y=qtl_effect*ref.genotype,na.action=na.omit())$p.value,n.cor=length(qtl_effect), + location_left=min(gene_bp)/1e6,location_median=median(gene_bp)/1e6,location_right=max(gene_bp)/1e6) %>% + as.data.frame() + } + + + if(method =="interval"){ + out <- mutate(eQTL.table.output,order_gene=ceiling(gene_bp/interval)) %>% + group_by(strain,gene_chromosome,order_gene,sample) %>% + mutate(n.cor=length(qtl_effect)) %>% + as.data.frame() %>% + mutate(order_gene = ifelse(n.cor<10,order_gene-1,order_gene)) %>% + group_by(strain,gene_chromosome,order_gene,sample) %>% + mutate(n.cor=length(qtl_effect)) %>% + as.data.frame() %>% + mutate(order_gene = ifelse(n.cor<10,order_gene-1,order_gene)) %>% + group_by(strain,gene_chromosome,order_gene,sample) %>% + summarise(correlation = cor(x=ratio,y=qtl_effect*ref.genotype,use="complete.obs"), + correlation_significance=cor.test(x=ratio,y=qtl_effect*ref.genotype,na.action=na.omit())$p.value,n.cor=length(qtl_effect), + location_left=min(gene_bp)/1e6,location_median=median(gene_bp)/1e6,location_right=max(gene_bp)/1e6) %>% + as.data.frame() + } + + return(out) + } + + + ### strain.map, map of the investigated strains + ### strain.marker, marker file of the investigated strains + + plot.eQTL.correlation <- function(eQTL.correlator.output,strain.map,strain.marker,filename){ + if(missing(eQTL.correlator.output)){ stop("provide output from eQTL.correlator function")} + if(missing(strain.map)){ stop("specify strain.map (genetic map)")} + if(missing(strain.marker)){ strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1])) + rownames(strain.marker) <- 1:dim(strain.map)[1]} + if(missing(filename )){ filename <- "eQTL_genotype"} + + pdf(paste(filename,".pdf",sep="_"),width=18,height=8) + for(i in 1:length(unique(eQTL.correlator.output$sample))){ + plot.data <- eQTL.correlator.output[eQTL.correlator.output$sample==unique(eQTL.correlator.output$sample)[i],] + + plot.eQTL <- ggplot(plot.data,aes(x=location_median,y=correlation,colour=n.cor)) + + geom_point(aes(alpha=0.5)) + geom_smooth(method="loess",span=0.5,se=TRUE) + geom_segment(aes(x=location_left,y=correlation,xend=location_right,yend=correlation,alpha=0.5)) + + facet_grid(.~gene_chromosome, scales="free") + geom_hline(yintercept=0) + presentation + + labs(x="Marker location (Mb)",y=paste("Correlation N2 and",tolower(unlist(unique(plot.data$strain))[1]))) + ylim(-2,2) + + ggtitle(unique(plot.data$sample)) + + plot.data <- data.frame(cbind(name=strain.marker[,1], + chromosome=strain.marker[,2], + bp=strain.marker[,3], + current=strain.map[,tolower(colnames(strain.map))==tolower(unlist(unique(plot.data$strain))[1])])) + plot.data[,1] <- as.character(unlist(plot.data[,1])) + plot.data[,2] <- as.character(unlist(plot.data[,2])) + plot.data[,3] <- as.numeric(as.character(unlist(plot.data[,3]))) + plot.data[,4] <- as.numeric(as.character(unlist(plot.data[,4]))) + + plot.mapz <- ggplot(plot.data,aes(x=bp/1e6,y=current,colour=current)) + + geom_point() + geom_line() + facet_grid(.~chromosome, scales="free") + + geom_hline(yintercept=0) + presentation + + labs(x="Marker location (Mb)",y=paste("Map",tolower(unlist(unique(plot.data$strain))[1]))) + ylim(-2,2) + + grid.arrange(plot.eQTL , plot.mapz, + ncol=1,heights=c(4,2)) + + } + dev.off() + } + + + + + + + + + + + + + + + diff --git a/functions/emma_functions.r b/functions/emma_functions.r new file mode 100644 index 0000000..3cfc309 --- /dev/null +++ b/functions/emma_functions.r @@ -0,0 +1,515 @@ +emma.REML.t <- function (ys, xs, K, Z = NULL, X0 = NULL, ngrids = 100, llim = -10,ulim = 10, esp = 1e-10, ponly = FALSE){ + if (is.null(dim(ys)) || ncol(ys) == 1) { + ys <- matrix(ys, 1, length(ys)) + } + if (is.null(dim(xs)) || ncol(xs) == 1) { + xs <- matrix(xs, 1, length(xs)) + } + if (is.null(X0)) { + X0 <- matrix(1, ncol(ys), 1) + } + g <- nrow(ys) + n <- ncol(ys) + m <- nrow(xs) + t <- ncol(xs) + q0 <- ncol(X0) + q1 <- q0 + 1 + stopifnot(nrow(K) == t) + stopifnot(ncol(K) == t) + stopifnot(nrow(X0) == n) + if (!ponly) { + REMLs <- matrix(nrow = m, ncol = g) + vgs <- matrix(nrow = m, ncol = g) + ves <- matrix(nrow = m, ncol = g) + } + dfs <- matrix(nrow = m, ncol = g) + stats <- matrix(nrow = m, ncol = g) + ps <- matrix(nrow = m, ncol = g) + if (sum(is.na(ys)) == 0) { + eig.L <- emma.eigen.L(Z, K) + x.prev <- vector(length = 0) + for (i in 1:m) { + vids <- !is.na(xs[i, ]) + nv <- sum(vids) + xv <- xs[i, vids] + if ((mean(xv) <= 0) || (mean(xv) >= 1)) { + if (!ponly) { + vgs[i, ] <- rep(NA, g) + ves[i, ] <- rep(NA, g) + dfs[i, ] <- rep(NA, g) + REMLs[i, ] <- rep(NA, g) + stats[i, ] <- rep(NA, g) + } + ps[i, ] = rep(1, g) + } + else if (identical(x.prev, xv)) { + if (!ponly) { + vgs[i, ] <- vgs[i - 1, ] + ves[i, ] <- ves[i - 1, ] + dfs[i, ] <- dfs[i - 1, ] + REMLs[i, ] <- REMLs[i - 1, ] + stats[i, ] <- stats[i - 1, ] + } + ps[i, ] <- ps[i - 1, ] + } + else { + if (is.null(Z)) { + X <- cbind(X0[vids, , drop = FALSE], xs[i, + vids]) + eig.R1 = emma.eigen.R.wo.Z(K[vids, vids], X) + } + else { + vrows <- as.logical(rowSums(Z[, vids])) + X <- cbind(X0[vrows, , drop = FALSE], Z[vrows, + vids, drop = FALSE] %*% t(xs[i, vids, drop = FALSE])) + eig.R1 = emma.eigen.R.w.Z(Z[vrows, vids], K[vids, + vids], X) + } + for (j in 1:g) { + if (nv == t) { + REMLE <- emma.REMLE(ys[j, ], X, K, Z, ngrids, + llim, ulim, esp, eig.R1) + if (is.null(Z)) { + U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values + + REMLE$delta)), t, t, byrow = TRUE) + dfs[i, j] <- nv - q1 + } + else { + U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + + REMLE$delta)), rep(sqrt(1/REMLE$delta), + n - t)), n, n, byrow = TRUE) + dfs[i, j] <- n - q1 + } + yt <- crossprod(U, ys[j, ]) + Xt <- crossprod(U, X) + iXX <- solve(crossprod(Xt, Xt)) + beta <- iXX %*% crossprod(Xt, yt) + if (!ponly) { + vgs[i, j] <- REMLE$vg + ves[i, j] <- REMLE$ve + REMLs[i, j] <- REMLE$REML + } + stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] * + REMLE$vg) + } + else { + if (is.null(Z)) { + eig.L0 <- emma.eigen.L.wo.Z(K[vids, vids]) + nr <- sum(vids) + yv <- ys[j, vids] + REMLE <- emma.REMLE(yv, X, K[vids, vids, + drop = FALSE], NULL, ngrids, llim, ulim, + esp, eig.R1) + U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values + + REMLE$delta)), nr, nr, byrow = TRUE) + dfs[i, j] <- nr - q1 + } + else { + eig.L0 <- emma.eigen.L.w.Z(Z[vrows, vids, + drop = FALSE], K[vids, vids]) + yv <- ys[j, vrows] + nr <- sum(vrows) + tv <- sum(vids) + REMLE <- emma.REMLE(yv, X, K[vids, vids, + drop = FALSE], Z[vrows, vids, drop = FALSE], + ngrids, llim, ulim, esp, eig.R1) + U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values + + REMLE$delta)), rep(sqrt(1/REMLE$delta), + nr - tv)), nr, nr, byrow = TRUE) + dfs[i, j] <- nr - q1 + } + yt <- crossprod(U, yv) + Xt <- crossprod(U, X) + iXX <- solve(crossprod(Xt, Xt)) + beta <- iXX %*% crossprod(Xt, yt) + if (!ponly) { + vgs[i, j] <- REMLE$vg + ves[i, j] <- REMLE$ve + REMLs[i, j] <- REMLE$REML + } + stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] * + REMLE$vg) + } + } + ps[i, ] <- 2 * pt(abs(stats[i, ]), dfs[i, ], + lower.tail = FALSE) + } + } + } + else { + eig.L <- emma.eigen.L(Z, K) + eig.R0 <- emma.eigen.R(Z, K, X0) + x.prev <- vector(length = 0) + for (i in 1:m) { + vids <- !is.na(xs[i, ]) + nv <- sum(vids) + xv <- xs[i, vids] + if ((mean(xv) <= 0) || (mean(xv) >= 1)) { + if (!ponly) { + vgs[i, ] <- rep(NA, g) + ves[i, ] <- rep(NA, g) + REMLs[i, ] <- rep(NA, g) + dfs[i, ] <- rep(NA, g) + } + ps[i, ] = rep(1, g) + } + else if (identical(x.prev, xv)) { + if (!ponly) { + stats[i, ] <- stats[i - 1, ] + vgs[i, ] <- vgs[i - 1, ] + ves[i, ] <- ves[i - 1, ] + REMLs[i, ] <- REMLs[i - 1, ] + dfs[i, ] <- dfs[i - 1, ] + } + ps[i, ] = ps[i - 1, ] + } + else { + if (is.null(Z)) { + X <- cbind(X0, xs[i, ]) + if (nv == t) { + eig.R1 = emma.eigen.R.wo.Z(K, X) + } + } + else { + vrows <- as.logical(rowSums(Z[, vids, drop = FALSE])) + X <- cbind(X0, Z[, vids, drop = FALSE] %*% + t(xs[i, vids, drop = FALSE])) + if (nv == t) { + eig.R1 = emma.eigen.R.w.Z(Z, K, X) + } + } + for (j in 1:g) { + vrows <- !is.na(ys[j, ]) + if (nv == t) { + yv <- ys[j, vrows] + nr <- sum(vrows) + if (is.null(Z)) { + if (nr == n) { + REMLE <- emma.REMLE(yv, X, K, NULL, ngrids, + llim, ulim, esp, eig.R1) + U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values + + REMLE$delta)), n, n, byrow = TRUE) + } + else { + eig.L0 <- emma.eigen.L.wo.Z(K[vrows, + vrows, drop = FALSE]) + REMLE <- emma.REMLE(yv, X[vrows, , drop = FALSE], + K[vrows, vrows, drop = FALSE], NULL, + ngrids, llim, ulim, esp) + U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values + + REMLE$delta)), nr, nr, byrow = TRUE) + } + dfs[i, j] <- nr - q1 + } + else { + if (nr == n) { + REMLE <- emma.REMLE(yv, X, K, Z, ngrids, + llim, ulim, esp, eig.R1) + U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values + + REMLE$delta)), rep(sqrt(1/REMLE$delta), + n - t)), n, n, byrow = TRUE) + } + else { + vtids <- as.logical(colSums(Z[vrows, + , drop = FALSE])) + eig.L0 <- emma.eigen.L.w.Z(Z[vrows, vtids, + drop = FALSE], K[vtids, vtids, drop = FALSE]) + REMLE <- emma.REMLE(yv, X[vrows, , drop = FALSE], + K[vtids, vtids, drop = FALSE], Z[vrows, + vtids, drop = FALSE], ngrids, llim, + ulim, esp) + U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values + + REMLE$delta)), rep(sqrt(1/REMLE$delta), + nr - sum(vtids))), nr, nr, byrow = TRUE) + } + dfs[i, j] <- nr - q1 + } + yt <- crossprod(U, yv) + Xt <- crossprod(U, X[vrows, , drop = FALSE]) + iXX <- solve(crossprod(Xt, Xt)) + beta <- iXX %*% crossprod(Xt, yt) + if (!ponly) { + vgs[i, j] <- REMLE$vg + ves[i, j] <- REMLE$ve + REMLs[i, j] <- REMLE$REML + } + stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] * + REMLE$vg) + } + else { + if (is.null(Z)) { + vtids <- vrows & vids + eig.L0 <- emma.eigen.L.wo.Z(K[vtids, vtids, + drop = FALSE]) + yv <- ys[j, vtids] + nr <- sum(vtids) + REMLE <- emma.REMLE(yv, X[vtids, , drop = FALSE], + K[vtids, vtids, drop = FALSE], NULL, + ngrids, llim, ulim, esp) + U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values + + REMLE$delta)), nr, nr, byrow = TRUE) + Xt <- crossprod(U, X[vtids, , drop = FALSE]) + dfs[i, j] <- nr - q1 + } + else { + vtids <- as.logical(colSums(Z[vrows, , + drop = FALSE])) & vids + vtrows <- vrows & as.logical(rowSums(Z[, + vids, drop = FALSE])) + eig.L0 <- emma.eigen.L.w.Z(Z[vtrows, vtids, + drop = FALSE], K[vtids, vtids, drop = FALSE]) + yv <- ys[j, vtrows] + nr <- sum(vtrows) + REMLE <- emma.REMLE(yv, X[vtrows, , drop = FALSE], + K[vtids, vtids, drop = FALSE], Z[vtrows, + vtids, drop = FALSE], ngrids, llim, + ulim, esp) + U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values + + REMLE$delta)), rep(sqrt(1/REMLE$delta), + nr - sum(vtids))), nr, nr, byrow = TRUE) + Xt <- crossprod(U, X[vtrows, , drop = FALSE]) + dfs[i, j] <- nr - q1 + } + yt <- crossprod(U, yv) + iXX <- solve(crossprod(Xt, Xt)) + beta <- iXX %*% crossprod(Xt, yt) + if (!ponly) { + vgs[i, j] <- REMLE$vg + ves[i, j] <- REMLE$ve + REMLs[i, j] <- REMLE$REML + } + stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] * + REMLE$vg) + } + } + ps[i, ] <- 2 * pt(abs(stats[i, ]), dfs[i, ], + lower.tail = FALSE) + } + } + } + if (ponly) { + return(ps) + } + else { + return(list(ps = ps, REMLs = REMLs, stats = stats, dfs = dfs, + vgs = vgs, ves = ves)) + } +} + +emma.kinship <- function (snps, method = "additive", use = "all") +{ + n0 <- sum(snps == 0, na.rm = TRUE) + nh <- sum(snps == 0.5, na.rm = TRUE) + n1 <- sum(snps == 1, na.rm = TRUE) + nNA <- sum(is.na(snps)) + stopifnot(n0 + nh + n1 + nNA == length(snps)) + if (method == "dominant") { + flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) > + 0.5), nrow(snps), ncol(snps)) + snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & + (snps == 0.5)] + } + else if (method == "recessive") { + flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) < + 0.5), nrow(snps), ncol(snps)) + snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & + (snps == 0.5)] + } + else if ((method == "additive") && (nh > 0)) { + dsnps <- snps + rsnps <- snps + flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) > + 0.5), nrow(snps), ncol(snps)) + dsnps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & + (snps == 0.5)] + flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) < + 0.5), nrow(snps), ncol(snps)) + rsnps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & + (snps == 0.5)] + snps <- rbind(dsnps, rsnps) + } + if (use == "all") { + mafs <- matrix(rowMeans(snps, na.rm = TRUE), nrow(snps), + ncol(snps)) + snps[is.na(snps)] <- mafs[is.na(snps)] + } + else if (use == "complete.obs") { + snps <- snps[rowSums(is.na(snps)) == 0, ] + } + n <- ncol(snps) + K <- matrix(nrow = n, ncol = n) + diag(K) <- 1 + for (i in 2:n) { + for (j in 1:(i - 1)) { + x <- snps[, i] * snps[, j] + (1 - snps[, i]) * (1 - + snps[, j]) + K[i, j] <- sum(x, na.rm = TRUE)/sum(!is.na(x)) + K[j, i] <- K[i, j] + } + } + return(K) +} + + +emma.eigen.L <- function (Z, K, complete = TRUE) +{ + if (is.null(Z)) { + return(emma.eigen.L.wo.Z(K)) + } + else { + return(emma.eigen.L.w.Z(Z, K, complete)) + } +} + +emma.eigen.L.wo.Z <- function (K) +{ + eig <- eigen(K, symmetric = TRUE) + return(list(values = eig$values, vectors = eig$vectors)) +} + + +emma.eigen.R.wo.Z <- function (K, X) +{ + n <- nrow(X) + q <- ncol(X) + S <- diag(n) - X %*% solve(crossprod(X, X)) %*% t(X) + eig <- eigen(S %*% (K + diag(1, n)) %*% S, symmetric = TRUE) + stopifnot(!is.complex(eig$values)) + return(list(values = eig$values[1:(n - q)] - 1, vectors = eig$vectors[, + 1:(n - q)])) +} + +emma.eigen.R <- function(Z,K,X,complete=TRUE) { + if ( is.null(Z) ) { + return(emma.eigen.R.wo.Z(K,X)) + } + else { + return(emma.eigen.R.w.Z(Z,K,X,complete)) + } +} + +emma.REMLE <- function (y, X, K, Z = NULL, ngrids = 100, llim = -10, ulim = 10, + esp = 1e-10, eig.L = NULL, eig.R = NULL) +{ + n <- length(y) + t <- nrow(K) + q <- ncol(X) + stopifnot(ncol(K) == t) + stopifnot(nrow(X) == n) + if (det(crossprod(X, X)) == 0) { + warning("X is singular") + return(list(REML = 0, delta = 0, ve = 0, vg = 0)) + } + if (is.null(Z)) { + if (is.null(eig.R)) { + eig.R <- emma.eigen.R.wo.Z(K, X) + } + etas <- crossprod(eig.R$vectors, y) + logdelta <- (0:ngrids)/ngrids * (ulim - llim) + llim + m <- length(logdelta) + delta <- exp(logdelta) + Lambdas <- matrix(eig.R$values, n - q, m) + matrix(delta, + n - q, m, byrow = TRUE) + Etasq <- matrix(etas * etas, n - q, m) + LL <- 0.5 * ((n - q) * (log((n - q)/(2 * pi)) - 1 - log(colSums(Etasq/Lambdas))) - + colSums(log(Lambdas))) + dLL <- 0.5 * delta * ((n - q) * colSums(Etasq/(Lambdas * + Lambdas))/colSums(Etasq/Lambdas) - colSums(1/Lambdas)) + optlogdelta <- vector(length = 0) + optLL <- vector(length = 0) + if (dLL[1] < esp) { + optlogdelta <- append(optlogdelta, llim) + optLL <- append(optLL, emma.delta.REML.LL.wo.Z(llim, + eig.R$values, etas)) + } + if (dLL[m - 1] > 0 - esp) { + optlogdelta <- append(optlogdelta, ulim) + optLL <- append(optLL, emma.delta.REML.LL.wo.Z(ulim, + eig.R$values, etas)) + } + for (i in 1:(m - 1)) { + if ((dLL[i] * dLL[i + 1] < 0 - esp * esp) && (dLL[i] > + 0) && (dLL[i + 1] < 0)) { + r <- uniroot(emma.delta.REML.dLL.wo.Z, lower = logdelta[i], + upper = logdelta[i + 1], lambda = eig.R$values, + etas = etas) + optlogdelta <- append(optlogdelta, r$root) + optLL <- append(optLL, emma.delta.REML.LL.wo.Z(r$root, + eig.R$values, etas)) + } + } + } + else { + if (is.null(eig.R)) { + eig.R <- emma.eigen.R.w.Z(Z, K, X) + } + etas <- crossprod(eig.R$vectors, y) + etas.1 <- etas[1:(t - q)] + etas.2 <- etas[(t - q + 1):(n - q)] + etas.2.sq <- sum(etas.2 * etas.2) + logdelta <- (0:ngrids)/ngrids * (ulim - llim) + llim + m <- length(logdelta) + delta <- exp(logdelta) + Lambdas <- matrix(eig.R$values, t - q, m) + matrix(delta, + t - q, m, byrow = TRUE) + Etasq <- matrix(etas.1 * etas.1, t - q, m) + dLL <- 0.5 * delta * ((n - q) * (colSums(Etasq/(Lambdas * + Lambdas)) + etas.2.sq/(delta * delta))/(colSums(Etasq/Lambdas) + + etas.2.sq/delta) - (colSums(1/Lambdas) + (n - t)/delta)) + optlogdelta <- vector(length = 0) + optLL <- vector(length = 0) + if (dLL[1] < esp) { + optlogdelta <- append(optlogdelta, llim) + optLL <- append(optLL, emma.delta.REML.LL.w.Z(llim, + eig.R$values, etas.1, n, t, etas.2.sq)) + } + if (dLL[m - 1] > 0 - esp) { + optlogdelta <- append(optlogdelta, ulim) + optLL <- append(optLL, emma.delta.REML.LL.w.Z(ulim, + eig.R$values, etas.1, n, t, etas.2.sq)) + } + for (i in 1:(m - 1)) { + if ((dLL[i] * dLL[i + 1] < 0 - esp * esp) && (dLL[i] > + 0) && (dLL[i + 1] < 0)) { + r <- uniroot(emma.delta.REML.dLL.w.Z, lower = logdelta[i], + upper = logdelta[i + 1], lambda = eig.R$values, + etas.1 = etas.1, n = n, t1 = t, etas.2.sq = etas.2.sq) + optlogdelta <- append(optlogdelta, r$root) + optLL <- append(optLL, emma.delta.REML.LL.w.Z(r$root, + eig.R$values, etas.1, n, t, etas.2.sq)) + } + } + } + maxdelta <- exp(optlogdelta[which.max(optLL)]) + maxLL <- max(optLL) + if (is.null(Z)) { + maxva <- sum(etas * etas/(eig.R$values + maxdelta))/(n - + q) + } + else { + maxva <- (sum(etas.1 * etas.1/(eig.R$values + maxdelta)) + + etas.2.sq/maxdelta)/(n - q) + } + maxve <- maxva * maxdelta + return(list(REML = maxLL, delta = maxdelta, ve = maxve, vg = maxva)) +} + + +emma.delta.REML.LL.wo.Z <- function (logdelta, lambda, etas) +{ + nq <- length(etas) + delta <- exp(logdelta) + return(0.5 * (nq * (log(nq/(2 * pi)) - 1 - log(sum(etas * + etas/(lambda + delta)))) - sum(log(lambda + delta)))) +} + + +emma.delta.REML.dLL.wo.Z <- function (logdelta, lambda, etas) +{ + nq <- length(etas) + delta <- exp(logdelta) + etasq <- etas * etas + ldelta <- lambda + delta + return(0.5 * (nq * sum(etasq/(ldelta * ldelta))/sum(etasq/ldelta) - + sum(1/ldelta))) +} \ No newline at end of file -- GitLab