From adce632abdac44397e425c76173666404fd80d60 Mon Sep 17 00:00:00 2001
From: "Hartanto, Margi" <margi.hartanto@wur.nl>
Date: Tue, 28 Apr 2020 13:51:22 +0200
Subject: [PATCH] add functions folder and fix horizontal threshold on eQTL
 histogram

---
 .gitignore                             |   1 -
 3-eqtl-visualization.R                 |   8 +-
 4-qtl-integration.R                    |   1 -
 functions/Auxiliary_functions.R        | 102 +++++
 functions/Data_preparation.R           | 113 ++++++
 functions/Heritabilities.R             | 312 +++++++++++++++
 functions/QTL_mapping.R                | 448 +++++++++++++++++++++
 functions/QTL_mapping_more.R           |  32 ++
 functions/QTL_simulation_permutation.R | 256 ++++++++++++
 functions/eQTL_based_analyses.R        | 126 ++++++
 functions/emma_functions.r             | 515 +++++++++++++++++++++++++
 11 files changed, 1906 insertions(+), 8 deletions(-)
 create mode 100644 functions/Auxiliary_functions.R
 create mode 100644 functions/Data_preparation.R
 create mode 100644 functions/Heritabilities.R
 create mode 100644 functions/QTL_mapping.R
 create mode 100644 functions/QTL_mapping_more.R
 create mode 100644 functions/QTL_simulation_permutation.R
 create mode 100644 functions/eQTL_based_analyses.R
 create mode 100644 functions/emma_functions.r

diff --git a/.gitignore b/.gitignore
index 16132e0..1b0c992 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,6 @@
 .Ruserdata
 figures
 files
-functions
 networks
 qtl-peaks
 qtl-permutations
diff --git a/3-eqtl-visualization.R b/3-eqtl-visualization.R
index cf86a1d..cf64978 100644
--- a/3-eqtl-visualization.R
+++ b/3-eqtl-visualization.R
@@ -20,7 +20,6 @@
   library(UpSetR)
   library(forcats)
   library(grid)
-  
   library(heritability)
   library(topGO)
   library(org.At.tair.db)
@@ -33,9 +32,6 @@
   # library(factoextra)
   # library(stats)
   # library(amap)
-  # 
-  # 
-  # 
   # library(gridExtra)
   # library(RCy3)
   # library(corrplot)
@@ -344,8 +340,8 @@
       ylim(c(0, 100)) +
       geom_hline(data = hist.threshold, aes(yintercept = threshold), 
                  linetype = 'dashed', 
-                 color = 'red', 
-                 size = .1) +
+                 color = 'black', 
+                 size = .5) +
       scale_x_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40))
     
     tiff(file = paste0("figures/eqtl-hist.tiff"), 
diff --git a/4-qtl-integration.R b/4-qtl-integration.R
index 8a906c7..06ed303 100644
--- a/4-qtl-integration.R
+++ b/4-qtl-integration.R
@@ -92,7 +92,6 @@
     geom_histogram(binwidth = 2000000, right = T, origin = 0, alpha = 1) +
     #geom_density(alpha = 0.5) +
     facet_grid(factor(qtl_level, level = c('phQTL', 'mQTL', 'eQTL')) ~ qtl_chromosome, scales="free") +
-    
     presentation +
     scale_fill_manual(values = c('#000000', '#ccbb44', '#228833', '#4477aa', '#cc3311')) +
     theme(legend.position = "top") +
diff --git a/functions/Auxiliary_functions.R b/functions/Auxiliary_functions.R
new file mode 100644
index 0000000..c1ac574
--- /dev/null
+++ b/functions/Auxiliary_functions.R
@@ -0,0 +1,102 @@
+###Linear models
+    ###Fast summary function
+    ###This function cannot handle NA's!
+    fast.summ <- function(input){
+                          p <- input$rank
+                          rdf <- input$df.residual
+                          Qr <- input$qr
+                          n <- nrow(Qr$qr)
+
+                          p1 <- 1L:p
+                          r <- input$residuals
+                          rss <- colSums(r^2)
+
+                          resvar <- rss/rdf
+                          R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
+                          se <- sqrt(rep(diag(R),each=length(resvar)) * rep(resvar,times=length(diag(R))))
+                          est <- input$coefficients[Qr$pivot[p1],]
+                          tval <- t(t(est)/se)
+
+                          pvals <- round(-log10(2 * pt(abs(tval),rdf, lower.tail = FALSE)),digits=2)
+                          output <- cbind(t(pvals)[,-1],t(round(est,digits=5))[,-1])
+                          return(output)
+                         }
+
+
+    ###Fast linear model, 1 variable
+    ###Uses fast.summ, can't handle NA's!
+    fast.lm.1 <- function(traits,variable){
+                          if(missing(traits)){        stop("specify traits")}
+                          if(missing(variable)){      stop("specify variable")}
+
+                          output <- fast.summ(lm(t(traits)~variable))
+                          return(output)
+                         }
+
+    
+    ###Fast linear model, 2 variables, additive
+    fast.lm.2.add <- function(variable1,traits,variable2){
+      if(missing(traits)){        stop("specify traits")}
+      if(missing(variable1)){     stop("specify variable1")}
+      if(missing(variable2)){     stop("specify variable2")}
+      
+      output <- fast.summ(lm(t(traits)~variable1+variable2))
+      if(dim(output)[2]==2){
+          output <- cbind(output[,1],NA,output[,2],NA)
+      }
+      if(dim(output)[2]==4){
+          colnames(output) <- c("pval1","pval2","eff1","eff2") 
+          return(output)
+      }     
+    }
+    
+    
+    ###Fast linear model, 2 variables, interaction
+    fast.lm.2.int <- function(variable1,traits,variable2){
+      if(missing(traits)){        stop("specify traits")}
+      if(missing(variable1)){     stop("specify variable1")}
+      if(missing(variable2)){     stop("specify variable2")}
+      
+      output <- fast.summ(lm(t(traits)~variable1*variable2))
+      if(dim(output)[2]==2){
+          output <- cbind(output[,1],NA,NA,output[,2],NA,NA)
+      }
+      if(dim(output)[2]==4){
+          output <- cbind(output[,1:2],NA,output[,3:4],NA)
+      }
+      if(dim(output)[2]==6){
+          colnames(output) <- c("pval1","pval2","pvali","eff1","eff2","effi") 
+          return(output)
+      }          
+    }
+
+    ###Slow linear model for NA handling
+    lm.1 <- function(data.input,variable){
+                     lm.tapply <- function(y,x){return(c(diff(tapply(y[x!=0 & !is.na(x)],x[x!=0 & !is.na(x)],mean,na.rm=T)),cor.test(y[x!=0 & !is.na(x)],x[x!=0 & !is.na(x)],na.action=na.exclude())$p.value))}
+
+                     out <- tapply(as.numeric(t(data.input)),rep(rownames(data.input),each=ncol(data.input)),lm.tapply,x=variable)
+                     out <- unlist(out)
+                     out <- cbind(LOD=-log10(out[seq(2,length(out),by=2)]),effect=out[seq(1,length(out),by=2)])
+
+                     return(out)
+                    }
+
+
+
+    ###Permutation function
+    permutate.traits <- function(trait.matrix){
+                                 perm.list <- rep(c(1:nrow(trait.matrix)),times=ncol(trait.matrix)) + runif(length(trait.matrix),0.25,0.75)
+                                 perm.list <- order(perm.list)
+                                 perm.data <- matrix(as.numeric(trait.matrix)[perm.list],nrow=nrow(trait.matrix),ncol=ncol(trait.matrix),byrow=TRUE)
+                                 return(perm.data)
+    }
+
+    
+    ###calculates R-squared as in a linear model
+    R.sq <- function(x,y){return(cor(x,y,use="na.or.complete")^2)}
+    
+      
+
+
+    
+    
diff --git a/functions/Data_preparation.R b/functions/Data_preparation.R
new file mode 100644
index 0000000..95b9084
--- /dev/null
+++ b/functions/Data_preparation.R
@@ -0,0 +1,113 @@
+###Wrongly labeled sample function, takes eQTL table output
+###v1.1 Fixed marker calling, now markers are re-calculated based on the population used (so the known genetic
+###     information about the population).
+    eQTL.WLS <- function(eQTL.table.output,mean.centered.data,strain.map,strain.marker){
+                         if(missing(eQTL.table.output)){                               stop("need output from eQTL.table")}
+                         if(missing(mean.centered.data)){                              stop("need mean centered data: columns: trait, strain, ratio, sample")}
+                         if(missing(strain.map)){                                      stop("strain.map missing; need the population map")}
+                         if(missing(strain.marker)){                                   stop("strain.marker missing; need information about the markers: name, chromosome, and basepair")}
+
+                         ###Force strain.marker column names into format
+                         if(ncol(strain.marker) != 3){                             stop("need 3 columns in strain.marker")}
+                         colnames(strain.marker) <- c("name","chromosome","basepair")
+                         strain.marker <- mutate(strain.marker,marker=1:nrow(strain.map))
+
+                         ###Force mean.centered.data column names into format
+                         if(ncol(mean.centered.data) != 4){                             stop("need 4 columns in mean.centered.data")}
+                         colnames(mean.centered.data) <- c("trait", "strain", "ratio", "sample")
+
+                         ###Convert genotypes to list
+                         geno.list <- mutate(as.data.frame(strain.map),marker=c(1:nrow(strain.map))) %>%
+                                      gather(key=strain_genotype,value=genotype,-marker) %>%
+                                      filter(!is.na(genotype))
+
+                         ###Take only the cis eQTL
+                         eQTL.table.output <- filter(eQTL.table.output,qtl_type=="cis") %>%
+                                              select(trait,qtl_chromosome,qtl_bp,qtl_significance,qtl_effect,gene_chromosome,gene_bp,gene_WBID) %>%
+                                              merge(strain.marker,by.x=2,by.y=2) %>%
+                                              group_by(trait,qtl_significance,qtl_effect) %>%
+                                              mutate(closeness=abs(qtl_bp-basepair)) %>%
+                                              mutate(closest=ifelse(closeness==min(closeness),"yep","nope")) %>%
+                                              filter(!duplicated(closest),closest=="yep") %>%
+                                              as.data.frame() %>%
+                                              select(marker,trait,qtl_chromosome,qtl_bp,qtl_significance,qtl_effect,gene_chromosome,gene_bp,gene_WBID) %>%
+                                              merge(mean.centered.data)
+
+                         ###Use the cis QTL to calculate the correlation between the predicted (eQTL) effect versus the expression in the strain
+                         out <- NULL
+                         for(i in 1:length(unique(geno.list$strain_genotype))){
+                             out.tmp <- merge(eQTL.table.output,filter(geno.list,strain_genotype==unique(geno.list$strain_genotype)[i])) %>%
+                                        group_by(sample,strain,strain_genotype) %>%
+                                        summarise(correlation = cor(x=genotype*ratio,y=qtl_effect,use="complete.obs")) %>%
+                                        as.data.frame()
+                             out <- rbind(out,out.tmp)
+                         }
+                         out <- group_by(out,sample) %>%
+                                mutate(rank.map=length(correlation)-rank(correlation)+1) %>%
+                                as.data.frame()
+
+                         ###Return the correlations
+                         return(out)
+                        }
+
+###Plotting function for WLS data
+    plot.eQTL.WLS <- function(eQTL.WLS.output,filename){
+                              if(missing(filename )){                               filename <- "WLS"}
+
+                              ###Write a pdf with the correlations for manual inspection
+                              pdf(paste(filename,".pdf",sep=""),width=18,height=8)
+                                  for(i in 1:length(unique(eQTL.WLS.output$sample))){
+                                      to.plot <- filter(eQTL.WLS.output,sample==unique(eQTL.WLS.output$sample)[i])
+                                      plot(y=to.plot$correlation,x=to.plot$rank.map,main=unique(eQTL.WLS.output$sample)[i],axes=F,xlab="Predicted strain",ylab="Correlation",pch=19,col=rgb(0,0,0,alpha=0.5))
+                                      axis(1,c(1:nrow(to.plot)),labels=to.plot$strain_genotype[order(to.plot$rank.map)],las=2,cex.axis=0.5)
+                                      axis(2,c(-1,-0.5,0,0.5,1),c(-1,-0.5,0,0.5,1),las=2)
+                                      text(y=seq(max(to.plot$correlation),mean(to.plot$correlation),length=10),x=0.75*nrow(to.plot),labels=apply(to.plot[order(to.plot$rank.map),][1:10,c(3,4)],1,paste,collapse=" "))
+                                      box()
+                                  }
+                              dev.off()
+                             }
+
+
+
+###Data preparation
+    QTL.data.prep <- function(trait.matrix,strain.trait,strain.map,strain.marker){
+                              if(missing(trait.matrix)){                             stop("requires trait data, traits per row and strains per column")}
+                              if(missing(strain.trait)){                             stop("requires the strain names matching the trait data")}
+                              if(missing(strain.map)){                               stop("requires the genetic map of the population")}
+                              if(missing(strain.marker)){                            stop("requires the marker info (name, chromosome, basepair) of the map")}
+                              if(ncol(trait.matrix) != length(strain.trait)){        stop("the number of strains does not correspond with the trait matrix")}
+                              if(sum(tolower(strain.trait) %in% tolower(colnames(strain.map))) < length(strain.trait)){
+                                warning("not all strains are in the strain.map file")}
+                              if(nrow(strain.map) != nrow(strain.marker)){           stop("the number of markers (strain.map) does not equal the number of marker descriptions (strain.marker)")}
+                              if(ncol(strain.marker) != 3){                          stop("make sure the strain.marker file contains a column with the marker name, chromosome, and bp location")}
+                              strain.map <- data.matrix(strain.map)
+
+                              ###Make matrix
+                              map.nu <- matrix(NA,nrow=nrow(strain.map),ncol=length(strain.trait))
+                              for(j in 1:length(strain.trait)){
+                                  map.nu[,j] <- strain.map[,tolower(colnames(strain.map)) == tolower(strain.trait[j])]
+                              }
+                              colnames(map.nu) <- strain.trait
+                              rownames(map.nu) <- rownames(strain.map)
+
+                              ###add rownames to the trait matrix
+                              if(length(rownames(trait.matrix)) == 0){
+                                  rownames(trait.matrix) <- 1:nrow(trait.matrix)
+                              }
+
+                              ###Make output
+                              output <- NULL
+                              output[[1]] <- trait.matrix
+                              output[[2]] <- map.nu
+                              output[[3]] <- data.frame(strain.marker)
+                              names(output) <- c("Trait","Map","Marker")
+
+                              ###return output
+                              return(output)
+                             }
+
+
+
+    
+
+    
\ No newline at end of file
diff --git a/functions/Heritabilities.R b/functions/Heritabilities.R
new file mode 100644
index 0000000..bf0fcd2
--- /dev/null
+++ b/functions/Heritabilities.R
@@ -0,0 +1,312 @@
+    ###calculates H2 based on REML (part of H2.broad)
+    H2.REML <- function(input,Vg.factor){
+                        if(missing(Vg.factor)){Vg.factor <- 1}
+      
+                        REML <- lmer(trait.value ~ 1 + (1|strain),data=input)
+                        Variations <- as.data.frame(VarCorr(REML))$vcov
+    
+                        H2 <- c((Variations[1]*Vg.factor)/sum(Variations*c(Vg.factor,1)),(Variations*c(Vg.factor,1)))
+                            names(H2) <- c("H2_REML","Vg","Ve")
+                        return(H2)
+                       }
+
+    ###Calculates H2 based on ANOVA (part of H2.broad)  
+    H2.ANOVA <- function(input,Vg.factor){
+                         if(missing(Vg.factor)){Vg.factor <- 1}
+      
+                         ANOVA <- anova(lm(trait.value~strain,data=input))
+                         Variations <- ANOVA[[2]]
+    
+                         H2 <- c((Variations[1]*Vg.factor)/sum(Variations*c(Vg.factor,1)),(Variations*c(Vg.factor,1)))
+                             names(H2) <- c("H2_ANOVA","Vg","Ve")
+                         return(H2)
+                        }
+
+    ###Calculates H2 as described in Keurentjes, 2007, in case error has to be estimated on limited number of strains
+    ###Permutation is based on Brem, 2005
+    H2.Keurentjes <- function(input,parental.strain,Vg.factor){
+                              if(missing(Vg.factor)){Vg.factor <- 1}
+      
+                              selc.parental <- input$strain %in% parental.strain
+    
+                              ###Ve is estimated based on parental replicates
+                              VAR.PL <- tapply(input$trait.value[selc.parental],as.character(unlist(input$strain[selc.parental])),var)
+                              N.PL <- tapply(input$trait.value[selc.parental],as.character(unlist(input$strain[selc.parental])),length)
+                              VAR.E <- sum(VAR.PL*(N.PL-1))/sum((N.PL-1))
+    
+                              ##Vt is estimated based on RILs
+                              VAR.T <- var(input$trait.value[!selc.parental])
+    
+                              H2 <- c(((VAR.T-VAR.E)*Vg.factor)/VAR.T,((VAR.T-VAR.E)*Vg.factor),VAR.E)
+                                  names(H2) <- c("H2_keurentjes","Vg","Ve")
+                              return(H2)
+                             }
+        
+    ###Wrapper function for broad-sense heritability
+    H2.broad <- function(trait.value,strain,trait,method,parental.strain,n.perm,Vg.factor){
+                         if(missing(trait.value)){                                  stop("requires trait value")}
+                         if(missing(strain)){                                       stop("requires strain values")}
+                         if(missing(trait)){                                        trait <- rep("A",length(trait.value))}
+                         if(missing(method)){                                       method <- "ANOVA"}
+                         if(!method %in% c("ANOVA","REML","Keurentjes_2007")){      stop("method should be either ANOVA, REML, or Keurentjes_2007")}
+                         if(method=="Keurentjes_2007"){
+                             if(missing(parental.strain)){                          stop("when using Keurentjes_2007, parental strain names should be provided")}
+                             if(sum(parental.strain %in% strain) != length(parental.strain)){
+                                                                                    stop("not all parental strains are in the data")}
+                         }
+                         if(missing(n.perm)){                                       n.perm <- 1000}
+                         if(missing(Vg.factor)){                                    Vg.factor <- 1}
+
+                         ###Prep the data
+                         input <- data.frame(cbind(trait.value=trait.value,strain=strain))
+                             input$trait.value <- as.numeric(as.character(unlist(input$trait.value)))
+                             input$strain <- as.factor(as.character(unlist(input$strain)))
+
+                         input <- split(input,trait)
+
+                         if(method == "ANOVA"){
+                             H2 <- t(sapply(input,H2.ANOVA,Vg.factor))
+
+                             perm <- NULL
+                             for(i in 1:n.perm){
+                                 input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                 H2.perm <- t(sapply(input.perm,H2.ANOVA,Vg.factor))
+                                 perm <- rbind(perm,H2.perm[,1])
+                             }
+
+                             H2 <- cbind(H2,FDR0.05_ANOVA=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]}))
+                         }
+                         if(method == "REML"){
+                             H2 <- t(sapply(input,H2.REML,Vg.factor))
+
+                             perm <- NULL
+                             for(i in 1:n.perm){
+                                 input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                 H2.perm <- t(sapply(input.perm,H2.REML,Vg.factor))
+                                 perm <- rbind(perm,H2.perm[,1])
+                             }
+
+                             H2 <- cbind(H2,FDR0.05_REML=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]}))
+                         }
+                         if(method == "Keurentjes_2007"){
+                             H2 <- t(sapply(input,H2.Keurentjes,parental.strain = parental.strain,Vg.factor))
+                             
+                             perm <- NULL
+                             for(i in 1:n.perm){
+                                 input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                 H2.perm <- t(sapply(input.perm,H2.Keurentjes,parental.strain = parental.strain,Vg.factor))
+                                 perm <- rbind(perm,H2.perm[,1])
+                             }                             
+                             
+                             H2 <- cbind(H2,FDR0.05_Keurentjes=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]}))
+                         }
+
+                         output <- as.data.frame(cbind(trait=names(input),round(H2,digits=3)))
+                             output[,1] <- as.character(unlist(output[,1]))
+                             output[,2] <- as.numeric(as.character(unlist(output[,2])))
+                             output[,3] <- as.numeric(as.character(unlist(output[,3])))
+                             output[,4] <- as.numeric(as.character(unlist(output[,4])))
+                             output[,5] <- as.numeric(as.character(unlist(output[,5])))
+
+                         return(output)
+                        }
+
+
+    
+      
+    ###Transgression, calculated and permutated as in Brem, 2005
+    transgression.calc <- function(trait.value,strain,trait,parental.strain,sigmas,n.perm){
+                                  if(missing(trait.value)){                                 stop("provide trait values")}
+                                  if(missing(strain)){                                      stop("provide strain names")}
+                                  if(missing(trait)){                                       trait <- rep("A",length(trait_value))}
+                                  if(missing(parental.strain)){                             stop("provide names of two parental strains")}
+                                  if(missing(sigmas)){                                      sigmas <- 2}
+                                  if(missing(n.perm)){                                      n.perm <- 1000}
+
+                                  ###Convenience function for transgression
+                                  transg.sapply <- function(input,parental.strain,sigmas){
+                                                            selc.parental1 <- tolower(input$strain) %in% tolower(parental.strain[1])
+                                                            selc.parental2 <- tolower(input$strain) %in% tolower(parental.strain[2])
+
+                                                            VAR.PL <- tapply(input$trait.value[(selc.parental1|selc.parental2)],input$strain[(selc.parental1|selc.parental2)],var)
+                                                            N.PL <- tapply(input$trait.value[(selc.parental1|selc.parental2)],input$strain[(selc.parental1|selc.parental2)],length)
+                                                            SD.PL <- (sum(VAR.PL*(N.PL-1))/sum((N.PL-1)))^2
+
+                                                            MEAN.PL1 <- mean(input$trait.value[selc.parental1],na.rm=T)
+                                                            MEAN.PL2 <- mean(input$trait.value[selc.parental2],na.rm=T)
+
+                                                            tmp <- tapply(input$trait.value[!(selc.parental1|selc.parental2)],input$strain[!(selc.parental1|selc.parental2)],mean,na.rm=T)
+
+                                                            transg <- sum(tmp < (min(c(MEAN.PL1,MEAN.PL2))-sigmas*SD.PL) |
+                                                                          tmp > (max(c(MEAN.PL1,MEAN.PL2))+sigmas*SD.PL))
+
+                                                            return(transg)
+                                                           }
+
+                                  ###Prep the data
+                                  input <- data.frame(cbind(trait.value=trait.value,strain=strain))
+                                      input$trait.value <- as.numeric(as.character(unlist(input$trait.value)))
+                                      input$strain <- as.character(unlist(input$strain))
+
+                                  input <- split(input,trait)
+
+                                  transg <- sapply(input,transg.sapply,parental.strain = parental.strain, sigmas = sigmas)
+
+                                  perm <- NULL
+                                  for(i in 1:n.perm){
+                                      input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                      transg.perm <- sapply(input.perm,transg.sapply,parental.strain = parental.strain, sigmas = sigmas)
+                                      perm <- rbind(perm,transg.perm)
+                                  }
+                                  FDR0.1 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.90)]})
+                                  FDR0.05 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]})
+                                  FDR0.01 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.99)]})
+
+                                  output <- as.data.frame(cbind(trait=names(input),n_strains_transgression=transg,FDR0.1=FDR0.1,FDR0.05=FDR0.05,FDR0.01=FDR0.01))
+                                      output[,1] <- as.character(unlist(output[,1]))
+                                      output[,2] <- as.numeric(as.character(unlist(output[,2])))
+                                      output[,3] <- as.numeric(as.character(unlist(output[,3])))
+                                      output[,4] <- as.numeric(as.character(unlist(output[,4])))
+                                      output[,5] <- as.numeric(as.character(unlist(output[,5])))
+                                  return(output)
+                                 }
+
+
+       
+    
+    
+    ###Narrow sense heritabilities as calculated in Rockman, 2010. (see also Speed et al, 2012, American journal of human genetics).
+        h2.emma <- function(input,strain.map,kinship.matrix,Vg.factor){
+                            if(missing(input)){                         stop("provide a vector with the phenotype, matching the strain.map")}
+                            if(missing(strain.map)){                    stop("provide a matrix with genotypes (encoded as -1, 1), columns strains, rows markers")}
+                            if(ncol(strain.map) != length(input)){        stop("phenotype vector does not match strain.map")}
+                            if(missing(kinship.matrix)){
+                                strain.map[strain.map==0 & !is.na(strain.map)] <- 0.5
+                                strain.map[strain.map==-1 & !is.na(strain.map)] <- 0
+                                strain.map[is.na(strain.map)] <- 0.5
+                                kinship.matrix <- emma.kinship(snps=strain.map)
+                            }
+                            if(missing(Vg.factor)){                     Vg.factor <- 1}
+    
+                            ###Calculate Vg and Ve
+                                emmaREML <- emma.REML.t(ys=input,xs=strain.map,K=kinship.matrix)
+    
+                            ###Calculate heritability
+                                Vg <- sum(emmaREML$vgs,na.rm=T)
+                                Ve <- sum(emmaREML$ves,na.rm=T)
+    
+                                h2 <- c((Vg*Vg.factor)/((Vg*Vg.factor)+Ve),Vg,Ve)
+                                names(h2) <- c("h2_emma","Vg","Ve")
+    
+                            return(h2)
+                           }
+        
+    ###narrow sense heritabilities as calculated by heritability package, from Kruijer, Boer & Malosetti, 2015
+        h2.REML <- function(input,strain.names,kinship.matrix,Vg.factor){
+                            if(missing(input)){                           stop("provide a vector with the phenotype, matching the strain.map")}
+                            if(missing(strain.names)){                    stop("provide a matrix with strain names matching the phenotype")}
+                            if(length(strain.names) != length(input)){    stop("phenotype vector does not match strain.map")}
+                            if(missing(Vg.factor)){                       Vg.factor <- 1}
+    
+                            ###Calculate Vg and Ve
+                                markerREML <- tryCatch({marker_h2(data.vector=input,geno.vector=strain.names,K=kinship.matrix,max.iter=40)}, 
+                                                       error=function(e){cat("ERROR :",conditionMessage(e), "\n"); markerREML <- data.frame(cbind(va=NA,ve=NA)); return(markerREML)})
+
+                            ###Calculate heritability
+                                Vg <- markerREML$va
+                                Ve <- markerREML$ve
+    
+                                h2 <- c((Vg*Vg.factor)/((Vg*Vg.factor)+Ve),Vg,Ve)
+                                names(h2) <- c("h2_REML","Vg","Ve")
+    
+                            return(h2)
+                           }       
+        
+
+    h2.narrow <- function(map1.output,method,n.perm,Vg.factor,small){
+                         if(missing(map1.output)){                                  stop("requires map1 file")}
+                         if(missing(method)){                                       method <- "REML"}
+                         if(!method %in% c("emma","REML")){                         stop("only emma and REML are currently supported")}
+                         if(missing(n.perm)){                                       n.perm <- 1000}
+                         if(missing(Vg.factor)){                                    Vg.factor <- 1}
+                         if(missing(small)){                                        small <- FALSE}
+      
+                         if(class(map1.output[[1]])=="numeric"){
+                             small <- TRUE
+                         }
+      
+                         if(small){
+                             map1.output$Trait[2,] <- map1.output$Trait[1,]
+                         }
+      
+                         ###start calculating
+                         if(method == "emma"){
+                             ###calculate kinship (handy in case of many traits)
+                             tmp <- map1.output$Map
+                             tmp[tmp==0 & !is.na(tmp)] <- 0.5
+                             tmp[tmp==-1 & !is.na(tmp)] <- 0
+                             tmp[is.na(tmp)] <- 0.5
+                             kinship.matrix <- emma.kinship(snps=tmp)
+                             
+                             h2 <- t(apply(map1.output$Trait,1,h2.emma,strain.map=map1.output$Map,kinship.matrix=kinship.matrix,Vg.factor=Vg.factor))
+
+                             ###permutation
+                             perm <- NULL
+                             
+                             map1.perm <- as.list(NULL)
+                             map1.perm[[1]] <- map1.output$Trait
+                             map1.perm[[2]] <- map1.output$Map
+                             names(map1.perm) <- c("Trait","Map")
+                             
+                             for(i in 1:n.perm){
+                                 map1.perm$Trait <- t(apply(map1.perm$Trait,1,function(x){x <- x[order(runif(length(x)))];return(x)}))
+                                 h2.perm <- t(apply(map1.perm$Trait,1,h2.emma,strain.map=map1.perm$Map,kinship.matrix=kinship.matrix,Vg.factor=Vg.factor))
+                                 perm <- rbind(perm,h2.perm[,1])
+                             }
+
+                             h2 <- cbind(h2,FDR0.05_emma=apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]}))
+                         }
+      
+                         if(method == "REML"){
+                             ###calculate kinship (handy in case of many traits)
+                             tmp <- map1.output$Map
+                             tmp[tmp==0 & !is.na(tmp)] <- 0.5
+                             tmp[tmp==-1 & !is.na(tmp)] <- 0
+                             tmp[is.na(tmp)] <- 0.5
+                             kinship.matrix <- emma.kinship(snps=tmp)
+                                 rownames(kinship.matrix) <- colnames(map1.output$Map)
+                                 colnames(kinship.matrix) <- colnames(map1.output$Map)
+
+                                 h2 <- t(apply(map1.output$Trait,1,h2.REML,strain.names=colnames(map1.output$Map),kinship.matrix=kinship.matrix,Vg.factor=Vg.factor))
+
+                             ###permutation
+                             perm <- NULL
+                             
+                             map1.perm <- as.list(NULL)
+                             map1.perm[[1]] <- map1.output$Trait
+                             map1.perm[[2]] <- map1.output$Map
+                             names(map1.perm) <- c("Trait","Map")
+                             
+                             for(i in 1:n.perm){
+                                 map1.perm$Trait <- t(apply(map1.perm$Trait,1,function(x){x <- x[order(runif(length(x)))];return(x)}))
+                                 h2.perm <- t(apply(map1.perm$Trait,1,h2.REML,strain.names=colnames(map1.perm$Map),kinship.matrix=kinship.matrix,Vg.factor=Vg.factor))
+                                 perm <- rbind(perm,h2.perm[,1])
+                             }
+
+                             h2 <- cbind(h2,FDR0.05_REML=apply(perm,2,function(x){sort(x,na.last = FALSE)[ceiling(n.perm*0.95)]}))
+                         }        
+                         output <- as.data.frame(cbind(trait=rownames(h2),round(h2,digits=5)))
+                             output[,1] <- as.character(unlist(output[,1]))
+                             output[,2] <- as.numeric(as.character(unlist(output[,2])))
+                             output[,3] <- as.numeric(as.character(unlist(output[,3])))
+                             output[,4] <- as.numeric(as.character(unlist(output[,4])))
+                             output[,5] <- as.numeric(as.character(unlist(output[,5])))
+
+                         if(small){
+                             output <- output[1,]
+                         }
+                         return(output)
+                        }
+
+    
+        
\ No newline at end of file
diff --git a/functions/QTL_mapping.R b/functions/QTL_mapping.R
new file mode 100644
index 0000000..84401f3
--- /dev/null
+++ b/functions/QTL_mapping.R
@@ -0,0 +1,448 @@
+###eQTL mapping functions
+    ### Map function, 1 marker, genotypes per column, trait per row
+    ### map.1 is optimized for dataset in which many markers are un-informative
+    ### these markers are not mapped but the p-vals and effect are copied from the previous marker.
+    map.1 <- function(trait.matrix,strain.map,strain.marker){
+                      if(missing(trait.matrix)){                                      stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")}
+                      if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                      if(ncol(strain.map) !=  dim(as.matrix(trait.matrix))[2] &
+                         ncol(strain.map) !=  prod(dim(as.matrix(trait.matrix)))){    stop("number of strains is not equal to number of genotypes in map")}
+                      if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                      rownames(strain.marker) <- 1:dim(strain.map)[1]}
+
+                      ###Check for NAs
+                      NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0
+
+                      small <- FALSE
+                      if(dim(as.matrix(trait.matrix))[1] ==1){
+                          trait.matrix <- rbind(trait.matrix,1)
+                          small <- TRUE
+                      }
+
+                      eff.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+                      pval.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+
+                      for(i in 1:nrow(strain.map)){
+                          noseg <- length(unique(strain.map[i,])) ==1
+                          if(noseg){
+                              output.tmp <- matrix(c(0,0),byrow=TRUE,ncol=2,nrow=nrow(trait.matrix))
+                          }
+                          if( i == 1 & !noseg){
+                              if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])}
+                              if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])}
+                          }
+                          if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) != 0 & !noseg){
+                              if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])}
+                              if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])}
+                          }
+                          if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) == 0 & !noseg){
+                              output.tmp <- output.tmp
+                          }
+                          eff.out[,i] <- output.tmp[,2]
+                          pval.out[,i] <- output.tmp[,1]
+                      }
+                      if(!small){
+                        colnames(eff.out) <- rownames(strain.marker)
+                        rownames(eff.out) <- rownames(trait.matrix)
+                        colnames(pval.out) <- rownames(strain.marker)
+                        rownames(pval.out) <- rownames(trait.matrix)
+                      }
+                      if(small){
+                          traitnames <- rownames(trait.matrix)[1]
+                          eff.out <- matrix(eff.out[1,],nrow=1)
+                              rownames(eff.out) <- traitnames
+                              colnames(eff.out) <- rownames(strain.map)
+                          pval.out <- matrix(pval.out[1,],nrow=1)
+                              rownames(pval.out) <- traitnames
+                              colnames(pval.out) <- rownames(strain.map)
+                          trait.matrix <- matrix(trait.matrix[1,],nrow=1)
+                              rownames(trait.matrix) <- traitnames
+                              colnames(trait.matrix) <- colnames(strain.map)
+                      }
+
+                      output <- NULL; output <- as.list(output)
+                      output[[1]] <- round(pval.out,digits=2)
+                      output[[2]] <- round(eff.out,digits=3)
+                      output[[3]] <- trait.matrix
+                      output[[4]] <- strain.map
+                      output[[5]] <- strain.marker
+                      names(output) <- c("LOD","Effect","Trait","Map","Marker")
+                      return(output)
+                     }
+
+
+
+
+
+###Process the mapping data
+    ###This transforms the relevant output to a dataframe, making handling in dplyr and plotting using ggplot easier
+
+    ###This takes output of map.1 functions (list file with LOD, effect, Trait, and Map
+    ###And converts it to a list.
+    ###Optimized (a bit) by generating an empty matrix and filling the columns one-by-one.
+    ###added an option for single traits
+    mapping.to.list <- function(map1.output,small){
+                                if(missing(map1.output)){        stop("Missing map1.output ([[LOD]],[[Effect]],[[Trait]],[[Map]],[[Marker]])")}
+                                if(missing(small)){              small <- FALSE}
+      
+                                if(class(map1.output[[1]])=="numeric"){
+                                    small <- TRUE
+                                }
+ 
+                                if(small){
+                                    ###Split up to long format
+                                    output <- matrix(NA,length(map1.output[[1]]),6)
+                                    output[,1] <- rep(rownames(map1.output[[3]])[1],each=length(map1.output[[1]]))
+                                    output[,2] <- as.character(unlist(map1.output[[5]][,2]))
+                                    output[,3] <- as.numeric(as.character(unlist(map1.output[[5]][,3])))
+                                    output[,4] <- c(1:length(map1.output[[1]]))
+                                    output[,5] <- c(map1.output[[1]])
+                                    output[,6] <- c(map1.output[[2]])
+                                    output <- as.data.frame(output)
+                                }     
+                                if(!small){
+                                    ###Split up to long format
+                                    output <- matrix(NA,ncol(map1.output[[1]])*nrow(map1.output[[1]]),6)
+                                    output[,1] <- rep(rownames(map1.output[[1]]),each=ncol(map1.output[[1]]))
+                                    output[,2] <- rep(as.character(unlist(map1.output[[5]][,2])),times=nrow(map1.output[[1]]))
+                                    output[,3] <- rep(as.numeric(as.character(unlist(map1.output[[5]][,3]))),times=nrow(map1.output[[1]]))
+                                    output[,4] <- rep(c(1:ncol(map1.output[[1]])),times=nrow(map1.output[[1]]))
+                                    output[,5] <- c(t(map1.output[[1]]))
+                                    output[,6] <- c(t(map1.output[[2]]))
+                                    output <- as.data.frame(output)
+                                }
+
+                                ###overwrite dataframe formats
+                                output[,1] <- as.character(unlist(output[,1]))
+                                output[,2] <- as.character(unlist(output[,2]))
+                                output[,3] <- as.numeric(as.character(unlist(output[,3])))
+                                output[,4] <- as.numeric(as.character(unlist(output[,4])))
+                                output[,5] <- as.numeric(as.character(unlist(output[,5])))
+                                output[,6] <- as.numeric(as.character(unlist(output[,6])))
+                                colnames(output) <- c("trait","qtl_chromosome","qtl_bp","qtl_marker","qtl_significance","qtl_effect")
+
+                                ###Return
+                                return(output)
+                               }
+
+
+    ###This takes output of mapping.to.list function and identiefies peaks and confidence intervals v1.3
+    ###Based on a given threshold and a 1.5 LOD-drop.
+
+    ###v1.2 made the function more efficient, the peak borders are selected in one processing
+    ###v1.3 fixed the problem calling the peak if the peak borders the edge of the chromosome
+    ###v1.4 fixed larger then, smaller then bug
+    ###v1.5 fixed 'incompatible types' bug; NA is not recognized as numeric
+
+
+    peak.finder <- function(list.file,threshold,LOD.drop){
+                            if(missing(list.file)){            stop("Missing list.file (trait, qtl_chromosome, qtl_bp, qtl_marker, qtl_significance, qtl_effect)")}
+                            if(missing(threshold)){            stop("Missing threshold")}
+                            if(missing(LOD.drop)){             LOD.drop <- 1.5}
+
+                            ###locate the peak
+                            peaks <- dplyr::filter(list.file, qtl_significance >= threshold-LOD.drop) %>%
+                                     dplyr::group_by(trait,qtl_chromosome) %>%
+                                     dplyr::filter(qtl_significance == max(qtl_significance),max(qtl_significance)>=threshold) %>%
+                                     dplyr::filter(abs(qtl_marker - mean(qtl_marker)) == min(abs(qtl_marker - mean(qtl_marker)))) %>%
+                                     dplyr::filter(qtl_marker == min(qtl_marker)) %>%
+                                     as.data.frame() %>%
+                                     dplyr::mutate(qtl_peak=qtl_marker)
+
+                            ###locate left boundary
+                            peaks_left <- dplyr::filter(list.file,trait %in% unique(peaks$trait)) %>%
+                                          dplyr::group_by(trait,qtl_chromosome) %>%
+                                          dplyr::mutate(Border=ifelse((qtl_significance < max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold),T,
+                                                                 ifelse((qtl_bp == min(qtl_bp) & min(qtl_bp) %in% qtl_bp[qtl_significance >= max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold]),T,
+                                                                   ifelse((qtl_bp == max(qtl_bp) & max(qtl_bp) %in% qtl_bp[qtl_significance >= max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold]),T,F))),
+                                                        peakloc=qtl_marker[which.max(qtl_significance)]) %>%
+                                          dplyr::filter(Border) %>%
+                                          dplyr::mutate(qtl_marker_left=as.numeric(ifelse(qtl_marker == max(qtl_marker[qtl_marker <= peakloc]),qtl_marker,NA)),
+                                                        qtl_bp_left=as.numeric(ifelse(qtl_bp == max(qtl_bp[qtl_marker <= peakloc]),qtl_bp,NA)),
+                                                        qtl_marker_right=as.numeric(ifelse(qtl_marker == min(qtl_marker[qtl_marker >= peakloc]),qtl_marker,NA)),
+                                                        qtl_bp_right=as.numeric(ifelse(qtl_bp == min(qtl_bp[qtl_marker >= peakloc]),qtl_bp,NA))) %>%
+                                          dplyr::filter(!is.na(qtl_bp_left) | !is.na(qtl_bp_right)) %>%
+                                          dplyr::select(-Border,-peakloc)
+
+                            peaks_right <- dplyr::filter(peaks_left,!is.na(qtl_bp_right))
+                            peaks_left <- dplyr::filter(peaks_left,!is.na(qtl_bp_left))
+
+                            ###Merge the findings
+                            peaks <- cbind(peaks,peaks_left$qtl_marker_left,peaks_left$qtl_bp_left,peaks_right$qtl_marker_right,peaks_right$qtl_bp_right)
+                            ###Prepare input
+                            output <- dplyr::mutate(list.file,qtl_peak = NA,qtl_marker_left = NA,qtl_bp_left = NA,qtl_marker_right = NA,qtl_bp_right = NA)
+                            ###Merge peaks and input
+                            output[match(paste(peaks[,1],peaks[,2],peaks[,3]),paste(output[,1],output[,2],output[,3])),7:11] <- peaks[,7:11]
+                            output <- dplyr::mutate(output,qtl_bp_left=ifelse(qtl_bp_left > qtl_bp,qtl_bp,qtl_bp_left),qtl_bp_right=ifelse(qtl_bp_right<qtl_bp,qtl_bp,qtl_bp_right))
+
+                            ###return called peaks
+                            return(output)
+                           }
+
+
+    ###Makes a table with eQTL peaks v1.1
+        ###peak.list.file: output from peak.finder
+        ###trait.annotation: gene ID's of the mapped traits
+        ###cis.trans.window: Cis-trans windowd can be defined based on physical distance "physical", based on LOD-drop interval "LOD.drop", or on both "both"
+    
+    eQTL.table <- function(peak.list.file,trait.annotation,cis.trans.window,cis.trans.window.size,cis.trans.window.lodinterval,colnames.trait.annotations){
+                           if(missing(peak.list.file)){                                      stop("need an eQTL list file")}
+                           if(missing(trait.annotation)){                                    stop("need annotation file with order: trait, gene_chromosome, gene_bp, gene_WBID, gene_sequence_name, gene_public_name")}
+                           if(missing(cis.trans.window)){                                    cis.trans.window <- "both"} 
+                           if(!cis.trans.window %in% c("physical","LOD.drop","both")){       stop("cis.trans.window should be in physical, LOD.drop, or both")}
+                           if(missing(cis.trans.window.size)){                               cis.trans.window.size <- 1000000}
+                           if(missing(colnames.trait.annotations)){                          colnames.trait.annotations <- c("trait","gene_chromosome","gene_bp","gene_WBID","gene_sequence_name","gene_public_name")}
+
+                           ###Rename colnames traits
+                           colnames(trait.annotation) <- colnames.trait.annotations
+
+                           ###Take only the peaks
+                           if(cis.trans.window =="physical"){
+                               output <- dplyr::filter(peak.list.file,!is.na(qtl_peak)) %>%
+                                         merge(trait.annotation,by.x=1,by.y=1) %>%
+                                         dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & abs(qtl_bp-gene_bp) < cis.trans.window.size),"cis","trans"))
+                           }
+                      
+                           if(cis.trans.window =="LOD.drop"){
+                               output <- dplyr::filter(peak.list.file,!is.na(qtl_peak)) %>%
+                                         merge(trait.annotation,by.x=1,by.y=1) %>%
+                                         dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & (gene_bp > qtl_bp_left & gene_bp < qtl_bp_right)),"cis","trans"))
+                           }
+                           
+                           if(cis.trans.window =="both"){
+                               output <- dplyr::filter(peak.list.file,!is.na(qtl_peak)) %>%
+                                         merge(trait.annotation,by.x=1,by.y=1) %>%
+                                         dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & abs(qtl_bp-gene_bp) < cis.trans.window.size) |
+                                                                         (qtl_chromosome==gene_chromosome & (gene_bp > qtl_bp_left & gene_bp < qtl_bp_right)),"cis","trans"))
+                           }
+                           
+                           ###Return file
+                           return(output)
+                          }
+
+
+    ###Makes a table with eQTL peaks for plotting
+    ###this function adds points to faithfully indicate the chromosome boundaries (standard is C. elegans))
+    prep.ggplot.eQTL.table <- function(eQTL.table.file,condition,chromosomes,chromosome_size){
+                                       if(missing(eQTL.table.file)){                        stop("need a eQTL.table.file")}
+                                       if(missing(condition)){                              condition <- "ct"}
+                                       if(missing(chromosomes)){                            chromosomes <- c("I","II","III","IV","V","X")}
+                                       if(missing(chromosome_size)){                        chromosome_size <- c(15072434,15279421,13783801,17493829,20924180,17718942)}
+                                       if(length(chromosomes) != length(chromosome_size)){  stop("chromosome names and sizes should have the same length")}
+
+                                       output <- dplyr::filter(eQTL.table.file, gene_chromosome %in% chromosomes) %>%
+                                                 dplyr::mutate(gene_chromosome = factor(gene_chromosome,levels=rev(chromosomes))) %>%
+                                                 dplyr::mutate(qtl_chromosome = factor(qtl_chromosome,levels=chromosomes),condition=condition) %>%
+                                                 dplyr::mutate(qtl_type = factor(qtl_type,levels=rev(sort(unique(qtl_type)))))
+
+                                       ###Chromosome sizes cheat
+                                       cheatpoints <- output[rep(1,times=length(chromosomes)*2),]
+                                       for(i in which(unlist(lapply(output,class)) == "factor")){
+                                           cheatpoints[,i] <- as.character(unlist(cheatpoints[,i]))
+                                       }
+                                       cheatpoints[1:nrow(cheatpoints),unlist(lapply(output,class)) != "character"] <- 0
+                                       cheatpoints[1:nrow(cheatpoints),unlist(lapply(output,class)) != "numeric"] <- ""
+                                       cheatpoints$qtl_chromosome <- as.character(rep(chromosomes,times=2))
+                                       cheatpoints$gene_chromosome <- as.character(rep(chromosomes,times=2))
+                                       cheatpoints$qtl_bp <- as.numeric(as.character(c(chromosome_size,rep(1,times=length(chromosomes)))))
+                                       cheatpoints$gene_bp <- as.numeric(as.character(c(chromosome_size,rep(1,times=length(chromosomes)))))
+                                       cheatpoints$trait <- paste(as.character(rep(chromosomes,times=2)),rep(c(1,2),each=length(chromosomes)))
+
+
+                                       ###combine
+                                       output <- rbind(output,cheatpoints)
+
+                                       ###return
+                                       return(output)
+                                      }
+
+
+    ################################################################################################################################################################
+    ### Add.Rsquared(R2.map,QTL.prep.input) v1.1
+    ###   This function will add the sum of squares of the used model to a QTL table output, it also calculates the r-squared of the model
+    ###   obligatory input
+    ###     R2.map: output of the eQTL.table function (requires marker numbers and trait names (as in th eprep.for.QTL function))
+    ###     QTL.prep.input: output of the prep.for.QTL function (list, first matrix is the data, second matrix the corresponding map)
+    ### v1.1 fixed a bug where, if the marker was a factor, wrong data was selected
+    ### c("LOD","Effect","Trait","Map","Marker")
+
+    eQTL.table.addR2 <- function(eQTL.table.file,QTL.prep.file){
+                                 if(missing(eQTL.table.file)){                          stop("Give a QTL summary table, should contain marker locations (qtl_marker, qtl_marker_1, qtl_marker_2) and trait names")}
+                                 if(missing(QTL.prep.file)){                            stop("Give the input on which was mapped (output of QTL.data.prep)")}
+
+                                 ###True if single marker mapping
+                                 if("qtl_marker" %in% colnames(eQTL.table.file)){
+                                     output <- NULL
+                                     for(i in 1:nrow(eQTL.table.file)){
+                                         ###get marker
+                                         mrk <- QTL.prep.file$Map[as.numeric(as.character(unlist(eQTL.table.file$qtl_marker[i]))),]
+                                         ###get expression
+                                         y <- unlist(QTL.prep.file$Trait[rownames(QTL.prep.file$Trait)==as.character(unlist(eQTL.table.file$trait[i])),])
+                                         ###run model
+                                         output <- c(output,R.sq(y,mrk))
+                                     }
+                                     output <- data.frame(cbind(eQTL.table.file,qtl_R2_sm=output))
+                                 }
+
+                                 ###True if multiple marker mapping
+                                 if("marker_1" %in% colnames(eQTL.table.file) & "marker_2" %in% colnames(eQTL.table.file)){
+                                     output <- cbind(eQTL.table.file,SS_int_mrk1=NA,SS_int_mrk2=NA,SS_int=NA,SS_int_res=NA,SS_add_mrk1=NA,SS_add_mrk2=NA,SS_add_res=NA)
+                                     for(i in 1:nrow(eQTL.table.file)){
+                                         ###get marker
+                                         mrk1 <- QTL.prep.file$genotype.map[eQTL.table.file$marker_1[i],]
+                                         mrk2 <- QTL.prep.file$genotype.map[eQTL.table.file$marker_2[i],]
+                                         ###get expression
+                                         y <- unlist(QTL.prep.file$trait.matrix[rownames(QTL.prep.file$trait.matrix)==eQTL.table.file$trait[i],])
+                                         ###run model
+                                         output[i,colnames(output)%in%c("SS_int_mrk1","SS_int_mrk2","SS_int","SS_int_res")] <- anova(lm(y ~ mrk1*mrk2))$"Sum Sq"
+                                         output[i,colnames(output)%in%c("SS_add_mrk1","SS_add_mrk2","SS_add_res")] <- anova(lm(y ~ mrk1+mrk2))$"Sum Sq"
+
+                                     }
+                                     output <- dplyr::mutate(output,r2_add=(SS_add_mrk1+SS_add_mrk2)/(SS_add_mrk1+SS_add_mrk2+SS_add_res),r2_int=SS_int/(SS_int_mrk1+SS_int_mrk2+SS_int+SS_int_res))
+                                 }
+                                 return(output)
+                                }
+
+    
+    
+    prep.ggplot.QTL.profile <- function(QTL.peak.output,map1.output,trait){
+                                        output <- list()
+
+                                        output[[1]] <- QTL.peak.output[as.character(unlist(QTL.peak.output$trait))==trait,]
+                                            names(output)[1] <- "QTL_profile"
+                                        tmp <- output[[1]]$qtl_peak[!is.na(output[[1]]$qtl_peak)]
+
+                                        if(length(tmp)>0){
+                                            for(i in 1:length(tmp)){
+                                                output[[i+1]] <- cbind(trait=trait,
+                                                                       map1.output$Marker[tmp[i],],
+                                                                       strain=colnames(map1.output$Trait),
+                                                                       genotype=map1.output$Map[tmp[i],],
+                                                                       trait_value=map1.output$Trait[rownames(map1.output$Trait)==trait,],
+                                                                       R_squared=R.sq(x=map1.output$Map[tmp[i],],y=map1.output$Trait[rownames(map1.output$Trait)==trait,]))
+                                                names(output)[i+1] <- paste("QTLsplit_",as.character(unlist(map1.output$Marker[tmp[i],1])),sep="_")
+                                            }
+                                        }
+                                        return(output)
+                                       }
+
+
+    map.2 <- function(trait.matrix,strain.map,strain.marker,method){
+                      if(missing(trait.matrix)){                                      stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")}
+                      if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                      if(ncol(strain.map) !=  dim(as.matrix(trait.matrix))[2] &
+                         ncol(strain.map) !=  prod(dim(as.matrix(trait.matrix)))){    stop("number of strains is not equal to number of genotypes in map")}
+                      if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                      rownames(strain.marker) <- 1:dim(strain.map)[1]}
+                      if(missing(method)){                                            method <- "Additive"}
+                      if(!method %in% c("Additive","Interaction")){                   stop("method should be Additive or Interaction")}
+
+
+                      small <- FALSE
+                      if(dim(as.matrix(trait.matrix))[1] ==1){
+                          trait.matrix <- rbind(trait.matrix,1)
+                          small <- TRUE
+                      }
+
+                      ###Check for NAs, and remove them (script is too slow)
+                      NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0  
+                      if(NAs){
+                          warning("NAs in trait.matrix will be replaced by the mean value of that trait, the NAs in strain.map will be replaced by 0")
+                          for(i in 1:nrow(trait.matrix)){
+                              trait.matrix[i,is.na(trait.matrix[i,])] <- mean(trait.matrix[,i],na.rm=T)
+                          }
+                          strain.map[is.na(strain.map)] <- 0                         
+                      }
+
+                      #map it
+                      variable1 <- cbind(as.numeric(t(strain.map)),rep(1:nrow(strain.map),each=ncol(strain.map)))
+                      tmp <- list()
+                      
+                      for(i in 1:nrow(strain.map)){
+                          variable2 <- strain.map[i,]
+                          if(method=="Additive"){
+                              tmp[[i]] <- tapply(variable1[,1],variable1[,2],fast.lm.2.add,traits=trait.matrix,variable2=variable2)
+                          }
+                          if(method =="Interaction"){
+                              tmp[[i]] <- tapply(variable1[,1],variable1[,2],fast.lm.2.int,traits=trait.matrix,variable2=variable2)
+                          }
+                          tmp[[i]] <- do.call("rbind",tmp[[i]])
+                          tmp[[i]] <- cbind(marker_1=rep(unique(variable1[,2]),each=nrow(trait.matrix)),marker_2=i,tmp[[i]])
+                      }
+                      
+                      tmp <- do.call("rbind",tmp) 
+                      
+                      output <- list()
+                      output[[1]] <- tmp
+                      output[[2]] <- trait.matrix
+                      output[[3]] <- strain.map
+                      output[[4]] <- strain.marker
+                      names(output) <- c(method,"Trait","Map","Marker")
+                                                                                        
+                      return(output)
+                     }  
+     
+                    
+     map.2.prune <- function(map.2.output,pval.min = 0.05,distance = 1e6,method = "fdr"){
+                             if(missing(map.2.output)){                         stop("require output from map.2")}
+
+                             if(ncol(map.2.output[[1]]) == 6){
+                                 map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]][,1] < map.2.output[[1]][,2],]
+                                 map.2.output[[1]] <- cbind(map.2.output[[1]],
+                                                            pval1_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,3],method=method)),digits=2),
+                                                            pval2_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,4],method=method)),digits=2))
+                                 map.2.output[[1]] <- map.2.output[[1]][(map.2.output[[1]][,3] > -log10(pval.min) | map.2.output[[1]][,4] > -log10(pval.min)) &
+                                                                        !is.na(map.2.output[[1]][,3]) & !is.na(map.2.output[[1]][,4]),]
+                                 map.2.output[[1]] <- data.frame(cbind(map.2.output[[1]],
+                                                                       trait=rownames(map.2.output[[1]]),
+                                                                       marker_1_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,1],2])),
+                                                                       marker_1_position=map.2.output$Marker[map.2.output[[1]][,1],3],
+                                                                       marker_2_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,2],2])),
+                                                                       marker_2_position=map.2.output$Marker[map.2.output[[1]][,2],3]))
+                                 for(i in c(1:8,11,13)){
+                                     map.2.output[[1]][,i] <- as.numeric(as.character(unlist(map.2.output[[1]][,i])))
+                                 }
+                                 for(i in c(9,10,12)){
+                                     map.2.output[[1]][,i] <- as.character(unlist(map.2.output[[1]][,i]))
+                                 }
+                                 map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]]$marker_1_chromosome != map.2.output[[1]]$marker_2_chromosome |
+                                                                        abs(map.2.output[[1]]$marker_1_position - map.2.output[[1]]$marker_2_position) > distance,]
+                             }
+
+                             if(ncol(map.2.output[[1]]) == 8){
+                                 map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]][,1] < map.2.output[[1]][,2],]
+                                 map.2.output[[1]] <- cbind(map.2.output[[1]],
+                                                            pval1_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,3],method=method)),digits=2),
+                                                            pval2_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,4],method=method)),digits=2),
+                                                            pvali_adj=round(-log10(p.adjust(10^-map.2.output[[1]][,5],method=method)),digits=2))
+                                 map.2.output[[1]] <- map.2.output[[1]][(map.2.output[[1]][,3] > -log10(pval.min) | map.2.output[[1]][,4] > -log10(pval.min) | map.2.output[[1]][,5] > -log10(pval.min)) &
+                                                                        !is.na(map.2.output[[1]][,3]) & !is.na(map.2.output[[1]][,4]) & !is.na(map.2.output[[1]][,5]),]
+                                 map.2.output[[1]] <- data.frame(cbind(map.2.output[[1]],
+                                                                       trait=rownames(map.2.output[[1]]),
+                                                                       marker_1_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,1],2])),
+                                                                       marker_1_position=map.2.output$Marker[map.2.output[[1]][,1],3],
+                                                                       marker_2_chromosome=as.character(unlist(map.2.output$Marker[map.2.output[[1]][,2],2])),
+                                                                       marker_2_position=map.2.output$Marker[map.2.output[[1]][,2],3]))
+                                 for(i in c(1:10,13,15)){
+                                     map.2.output[[1]][,i] <- as.numeric(as.character(unlist(map.2.output[[1]][,i])))
+                                 }
+                                 for(i in c(11,12,14)){
+                                     map.2.output[[1]][,i] <- as.character(unlist(map.2.output[[1]][,i]))
+                                 }
+                                 map.2.output[[1]] <- map.2.output[[1]][map.2.output[[1]]$marker_1_chromosome != map.2.output[[1]]$marker_2_chromosome |
+                                                                        abs(map.2.output[[1]]$marker_1_position - map.2.output[[1]]$marker_2_position) > distance,]
+                             }
+
+                             return(map.2.output)
+                            }
+
+
+
+
+
+
+
+
+
+
+
diff --git a/functions/QTL_mapping_more.R b/functions/QTL_mapping_more.R
new file mode 100644
index 0000000..ec38948
--- /dev/null
+++ b/functions/QTL_mapping_more.R
@@ -0,0 +1,32 @@
+###interactions between multiple peaks
+
+map.1.interactions <- function(eQTL.table.file,map1.output){
+                               if(missing(eQTL.table.file)){                                     stop("needs output from eQTL.table")}
+                               if(missing(map1.output)){                                         stop("needs the map1 output file")}
+
+                               traits <- eQTL.table.file$trait[duplicated(eQTL.table.file$trait)]
+
+                               output <- NULL
+                               for(i in 1:length(traits)){
+                                   traitrow <- which(rownames(map1.output$Trait) == traits[i])
+                                   maprows <- eQTL.table.file$qtl_peak[eQTL.table.file$trait == traits[i]]
+
+                                   lm_model_additive <- paste("lm(map1.output$Trait[",traitrow,",] ~ ",paste(paste("map1.output$Map[",maprows,",]",sep=""),collapse=" + "),")",sep="")
+                                   lm_model_interaction <- paste("lm(map1.output$Trait[",traitrow,",] ~ ",paste(paste("map1.output$Map[",maprows,",]",sep=""),collapse=" * "),")",sep="")
+
+                                   output_additive <- summary(eval(parse(text=lm_model_additive)))
+                                   output_interaction <- summary(eval(parse(text=lm_model_interaction)))
+
+                                   output <- rbind(output,cbind(trait=traits[i],model="additive",markers=rownames(output_additive[[4]])[-1],effect=output_additive[[4]][,1][-1],significance=output_additive[[4]][,4][-1],Rsquared=output_additive[[9]]))
+                                   output <- rbind(output,cbind(trait=traits[i],model="interaction",markers=rownames(output_interaction[[4]])[-1],effect=output_interaction[[4]][,1][-1],significance=output_interaction[[4]][,4][-1],Rsquared=output_interaction[[9]]))
+                               }
+                               output[,3] <- gsub("map1.output$Map[","",output[,3],fixed=TRUE)
+                               output[,3] <- gsub(", ]","",output[,3],fixed=TRUE)
+                                   rownames(output) <- c(1:nrow(output))
+                                   output <- data.frame(output)
+                                   for(i in c(1:3)){output[,i] <- as.character(unlist(output[,i]))}
+                                   for(i in c(4:6)){output[,i] <- as.numeric(as.character(unlist(output[,i])))}
+                                   output <- cbind(output,p_val=-log10(output[,5]))
+
+                               return(output)
+                              }
\ No newline at end of file
diff --git a/functions/QTL_simulation_permutation.R b/functions/QTL_simulation_permutation.R
new file mode 100644
index 0000000..217fc87
--- /dev/null
+++ b/functions/QTL_simulation_permutation.R
@@ -0,0 +1,256 @@
+    ###Permutation 1 marker mapping, genotypes per column, traits per row
+    map.1.perm <- function(trait.matrix,strain.map,strain.marker,n.perm){
+                           if(missing(trait.matrix)){                                      stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")}
+                           if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                           if(ncol(strain.map) !=  dim(as.matrix(trait.matrix))[2] &
+                              ncol(strain.map) !=  prod(dim(as.matrix(trait.matrix)))){    stop("number of strains is not equal to number of genotypes in map")}
+                           if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                           rownames(strain.marker) <- 1:dim(strain.map)[1]}
+                           if(missing(n.perm)){                                            n.perm <- 1000}
+                           if(n.perm == 1 & dim(as.matrix(trait.matrix))[2] ==1){          stop("cannot conduct 1 permutation on 1 trait, set n.perm > 1")}
+
+
+                           NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0
+
+                           small <- FALSE
+                           if(dim(as.matrix(trait.matrix))[1] ==1){
+                               trait.matrix <- rbind(trait.matrix,1)
+                               small <- TRUE
+                           }
+
+                           trait.matrix <- trait.matrix[rep(1:nrow(trait.matrix),each=n.perm),]
+                           perm.data <- permutate.traits(trait.matrix)
+                           rownames(perm.data) <- paste(rownames(trait.matrix),1:n.perm,sep="_")
+
+                           if(small){
+                               perm.data <- perm.data[1:n.perm,]
+                           }
+
+                           eff.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+                           pval.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+
+                           for(i in 1:nrow(strain.map)){
+                               noseg <- length(unique(strain.map[i,])) ==1
+                               if(noseg){
+                                   output.tmp <- matrix(c(0,0),byrow=TRUE,ncol=2,nrow=nrow(perm.data))
+                               }
+                               if( i == 1 & !noseg){
+                                   if(!NAs){output.tmp <- fast.lm.1(perm.data,strain.map[i,])}
+                                   if(NAs){output.tmp <- lm.1(perm.data,strain.map[i,])}
+                               }
+                               if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) != 0 & !noseg){
+                                   if(!NAs){output.tmp <- fast.lm.1(perm.data,strain.map[i,])}
+                                   if(NAs){output.tmp <- lm.1(perm.data,strain.map[i,])}
+                               }
+                               if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) == 0 & !noseg){
+                                   output.tmp <- output.tmp
+                               }
+                               eff.out[,i] <- output.tmp[,2]
+                               pval.out[,i] <- output.tmp[,1]
+                           }
+
+                           colnames(eff.out) <- rownames(strain.marker)
+                           rownames(eff.out) <- rownames(trait.matrix)
+                           colnames(pval.out) <- rownames(strain.marker)
+                           rownames(pval.out) <- rownames(trait.matrix)
+                           output <- NULL; output <- as.list(output)
+                           output[[1]] <- round(pval.out,digits=2)
+                           output[[2]] <- round(eff.out,digits=3)
+                           output[[3]] <- perm.data
+                           output[[4]] <- strain.map
+                           output[[5]] <- strain.marker
+                           names(output) <- c("LOD","Effect","Trait_perm","Map","Marker")
+                           return(output)
+                          }
+
+    ###Based on Benjamini and Yakuteili multiple testing under dependency
+    map.perm.fdr <- function(map1.output,filenames.perm,FDR_dir,q.value,small){
+                             if(missing(map1.output)){        stop("Missing map1.output ([[LOD]],[[Effect]],[[Trait]],[[Map]],[[Marker]])")}
+                             if(missing(filenames.perm)){     stop("Need a vector with filenames for permutation")}
+                             if(missing(FDR_dir)){            FDR_dir <- getwd()}
+                             if(missing(q.value)){            q.value <- 0.025}
+                             if(missing(small)){              small <- FALSE}
+
+                             output <- as.list(NULL)
+
+                             output[[2]] <- matrix(0,ncol=length(filenames.perm),nrow=5000)
+                             rownames(output[[2]]) <- seq(0.1,500,by=0.1)
+                             colnames(output[[2]]) <- filenames.perm
+
+                             ###in case of one trait
+                             if(small){
+                                 if(length(filenames.perm) > 1){
+                                     stop("needs one permutatation file, with n permutations")
+                                 }
+                                 tmp <- load(file=paste(FDR_dir,filenames.perm,sep=""))
+                                 ###No NA's
+                                 tmp <- get(tmp)[[1]]
+                                 tmp[is.na(tmp)] <- 0.1
+                                 
+                                 ###no Non-traits
+                                 tmp <- tmp[rownames(tmp) != "",]
+                                 output[[2]] <- apply(tmp,1,max,na.rm=T)
+                                 
+                                 if((length(output[[2]]) * q.value) < 10){
+                                      warning(paste("inadvisable to calculate q=",q.value,"based on ",length(output[[2]])," permutations. Increase to at least ", ceiling(10/q.value)))
+                                 }
+                                 
+                                 ###take the q-cutoff, *10 is to be consistent with eQTL methodology (is per 0.1)
+                                 output[[1]] <- rev(sort(output[[2]]))[ceiling(q.value*length(output[[2]]))]*10
+                                 
+                                 ###again, to be consistent; 
+                                 output[[3]] <- mean(output[[2]],na.rm = TRUE)
+                                 
+                                 ###real data
+                                 output[[4]] <- max(map1.output$LOD,na.rm=TRUE)
+                                 
+                                 names(output) <- c("Significance_threshold","False_discoveries","False_discoveries_average","Real_discoveries")
+                             }
+                             if(!small){
+                                 for(i in 1:length(filenames.perm)){
+                                     tmp <- load(file=paste(FDR_dir,filenames.perm[i],sep=""))
+                                     ###No NA's
+                                     tmp <- get(tmp)[[1]]
+                                     tmp[is.na(tmp)] <- 0.1
+    
+                                     tmp <- table(apply(round(tmp,digits=1),1,max,na.rm=T))
+    
+                                     output[[2]][(as.numeric(names(tmp))*10),i] <- tmp
+                                     output[[2]][,i] <- rev(cumsum(rev(output[[2]][,i])))
+                                 }
+                                 ###Take the average over permutations
+                                 output[[3]] <- apply(output[[2]],1,mean)
+    
+                                 ###Calculate the real discoveries, no NA's
+                                 tmp <- map1.output$LOD
+                                 tmp[is.na(tmp)] <- 0.1
+    
+                                 RDS.tmp <- table(round(apply(tmp,1,max,na.rm=T),digits=1))
+                                 RDS <- rep(0,times=5000); RDS[(as.numeric(names(RDS.tmp))*10)] <- RDS.tmp
+                                 RDS <- rev(cumsum(rev(RDS)))
+    
+                                 output[[4]] <- RDS
+    
+                                 output[[1]] <- which(round(((dim(map1.output$LOD)[1]-RDS)/dim(map1.output$LOD)[1])*q.value*log10(dim(map1.output$LOD)[1]),digits=3)-round(output[[3]]/output[[4]],digits=3)>0)[1]
+                                 names(output) <- c("Significance_threshold","False_discoveries","False_discoveries_average","Real_discoveries")
+                             }
+                             
+                             ###Return
+                             return(output)
+                            }
+
+###auxiliary function for data simulation    
+simulate.data.per.marker <- function(strain.map,strain.marker,n_per_marker,sigma_peak,sigma_error){
+                                    sim.map.file <- list()
+                                    
+                                    sim.map.file[[2]] <- data.matrix(strain.map)
+                                        types <- unique(as.numeric(as.character(unlist(sim.map.file[[2]]))))
+                                        sim.map.file[[2]] <- sim.map.file[[2]][rep(1:nrow(sim.map.file[[2]]),each=n_per_marker),]
+                                    
+                                    sim.map.file[[1]] <- sim.map.file[[2]]
+                                        sim.map.file[[1]][sim.map.file[[2]]==types[1]] <- sigma_peak
+                                        sim.map.file[[1]][sim.map.file[[2]]==types[2]] <- 0
+                                        sim.map.file[[1]] <- sim.map.file[[1]] + rnorm(n=length(sim.map.file[[1]]),mean=0,sd=sigma_error)
+                                        rownames(sim.map.file[[1]]) <- paste(rownames(sim.map.file[[1]]),rep(1:n_per_marker,times=nrow(sim.map.file[[2]])/n_per_marker),sep="_")
+                                    
+                                    sim.map.file[[2]] <- sim.map.file[[2]][seq(1,nrow(sim.map.file[[2]]),by=n_per_marker),]
+                                    
+                                    sim.map.file[[3]] <- strain.marker
+                                    
+                                    return(sim.map.file)
+                                   }    
+    
+###Conducts mapping simulations to investigate power
+map.1.simulation <- function(strain.map,strain.marker,n_per_marker,sigma_peak,sigma_error,threshold){
+                            if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                            if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                            rownames(strain.marker) <- 1:dim(strain.map)[1]}
+                            if(missing(n_per_marker)){                                      n_per_marker <-10}
+                            if(missing(sigma_peak)){                                        sigma_peak <- c(1,1.15,1.3,1.47,1.64,1.82,2,2.2,2.47,2.73,3.05,3.45,4)}
+                            if(missing(sigma_error)){                                       sigma_error <- 1}
+                            if(missing(threshold)){                                         threshold <- c(3.5)}
+                            ###1 sigma difference on 1 sigma noise is 20% of variance
+                            ###2 sigma difference on 1 sigma noise is 50% of variance
+                            ###3 sigma difference on 1 sigma noise is 69% of variance
+                            ###4 sigma difference on 1 sigma noise is 80% of variance; summary(lm(c(rnorm(10000,0,1),rnorm(10000,4,1))~rep(c(-1,1),each=10000)))
+                            ###cbind(var_expl=seq(5,80,by=5),sigma=c(0.46,0.67,0.84,1,1.15,1.3,1.47,1.64,1.82,2,2.2,2.47,2.73,3.05,3.45,4))
+    
+                            out <- NULL
+                            for(i in sigma_peak){
+                                ###Generate peaked data with random error
+                                    sim.map.file <- simulate.data.per.marker(strain.map,strain.marker,n_per_marker,sigma_peak=i,sigma_error)
+                                    sim.emp.file <- simulate.data.per.marker(strain.map,strain.marker,n_per_marker,sigma_peak=0,sigma_error)
+    
+                                ###Map it
+                                    sim.map.file <- map.1(sim.map.file[[1]],sim.map.file[[2]],sim.map.file[[3]])
+                                    
+                                    loc_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,which.max))))
+                                    lod_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,max,na.rm=TRUE))))
+                                    eff_peak <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_peak]
+    
+                                    loc_sim <- rep(1:nrow(sim.map.file$Map),each=n_per_marker)
+                                    lod_sim <- t(sim.map.file$LOD)[(ncol(sim.map.file$LOD)*(0:(nrow(sim.map.file$LOD)-1)))+loc_sim]
+                                    eff_sim <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_sim]
+    
+                                    out.sim <- NULL
+                                        out.sim <- c(out.sim,detected_true=sum(sim.map.file$Marker[loc_peak,2] == sim.map.file$Marker[loc_sim,2] & lod_peak-1.5 < lod_sim & lod_peak >= threshold))
+                                        out.sim <- c(out.sim,detected_false=(sum(lod_peak >= threshold) - sum(sim.map.file$Marker[loc_peak,2] == sim.map.file$Marker[loc_sim,2] & lod_peak-1.5 < lod_sim & lod_peak >= threshold)))
+                                        out.sim <- c(out.sim,detected_not = sum(lod_peak < threshold))
+                                            selc <- sim.map.file$Marker[loc_peak,2] == sim.map.file$Marker[loc_sim,2] & lod_peak-1.5 < lod_sim & lod_peak >= threshold
+                                        out.sim <- c(out.sim,detected_true_eff_est=quantile(abs(eff_peak[selc])/(i/2)))
+                                        out.sim <- c(out.sim,detected_true_loc_est=quantile(abs(sim.map.file$Marker[loc_peak,3]-sim.map.file$Marker[loc_sim,3])[selc]))
+    
+                                ###Map False file
+                                    sim.map.file <- map.1(sim.emp.file[[1]],sim.emp.file[[2]],sim.emp.file[[3]])
+    
+                                    loc_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,which.max))))
+                                    lod_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,max,na.rm=TRUE))))
+                                    eff_peak <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_peak]
+    
+                                    loc_sim <- rep(1:nrow(sim.map.file$Map),each=n_per_marker)
+                                    lod_sim <- t(sim.map.file$LOD)[(ncol(sim.map.file$LOD)*(0:(nrow(sim.map.file$LOD)-1)))+loc_sim]
+                                    eff_sim <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_sim]
+    
+                                    out.emp <- NULL
+                                        out.emp <- c(out.emp,detected_true=0)
+                                        out.emp <- c(out.emp,detected_false=sum(lod_peak >= threshold))
+                                        out.emp <- c(out.emp,detected_not = sum(lod_peak < threshold))
+    
+                                        out.emp <- c(out.emp,detected_true_eff_est=quantile(rep(NA,times=100),na.rm=T))
+                                        out.emp <- c(out.emp,detected_true_loc_est=quantile(rep(NA,times=100),na.rm=T))
+                                ###Combine and summarize
+                                    out.sim <- data.frame(cbind(name=c(paste("peak_sim_",i,sep=""),paste("noise_sim_",sigma_error,sep="")),
+                                                                var_explained=c(round(summary(lm(c(rnorm(100000,0,sigma_error),rnorm(100000,i,sigma_error))~rep(c(-1,1),each=100000)))$adj.r.squared,digits=2),round(summary(lm(c(rnorm(100000,0,sigma_error),rnorm(100000,0,sigma_error))~rep(c(-1,1),each=100000)))$adj.r.squared,digits=2)),
+                                                                rbind(out.sim,out.emp)))
+    
+                                    out <- rbind(out,out.sim)
+                            }
+                            
+                            ###Make sure numbers are numeric and characters are characteristic                            
+                            for(i in 2:ncol(out)){
+                                out[,i] <- as.numeric(as.character(unlist(out[,i])))
+                            }
+                            out[,1] <- as.character(unlist(out[,1]))
+
+
+                            ###summarize all noise simulations
+                            out[max(grep("noise_sim",out[,1])),-1] <- apply(out[grep("noise_sim",out[,1]),-1],2,sum)
+                            out <- out[c(grep("peak_sim",out[,1]),max(grep("noise_sim",out[,1]))),]
+                            
+                            ##Add percentage detected & percentage false
+                            output <- cbind(out,
+                                            percentage_detected=round(out$detected_true/(out$detected_true+out$detected_false+out$detected_not),digits=3)*100,
+                                            percentage_false=round(out$detected_false/(out$detected_true+out$detected_false+out$detected_not),digits=3)*100,
+                                            type=unlist(strsplit(out$name,split="_"))[seq(1,nrow(out)*3,by=3)],
+                                            sigma=unlist(strsplit(out$name,split="_"))[seq(3,nrow(out)*3,by=3)])
+                                                            
+                            output <- output[,c(18,19,2,3:5,16,17,6:15)]
+                            colnames(output)[grepl("_est.",colnames(output))] <- paste("quantile_",unlist(strsplit(colnames(output)[grepl("_est.",colnames(output))],split="_est."))[seq(2,2*sum(grepl("_est.",colnames(output))),by=2)],sep="")
+                            colnames(output) <- gsub("\\.","",colnames(output))
+                            
+                            colnames(output) <- paste(c(rep("Simulation",times=3),rep("QTL_detection",times=5),rep("Effect_size_estimation_(quantiles)",times=5),rep("QTL-true_peak_location_distance_(quantiles)",times=5)),colnames(output),sep=".")
+                            
+
+                            return(output)
+                           }
+
diff --git a/functions/eQTL_based_analyses.R b/functions/eQTL_based_analyses.R
new file mode 100644
index 0000000..60272fb
--- /dev/null
+++ b/functions/eQTL_based_analyses.R
@@ -0,0 +1,126 @@
+
+
+    ################################################################################################################################################################
+    ###eQTL_correlator(trait.dataframe,strain.map,strain.marker,eQTL.table,ref.genotype,method,n.genes,interval)
+    ###   predicts the genotype based on eQTL information, it returns a list with correlations.
+    ###   obligatory input
+    ###     eQTL.table.output, output from the eQTL.table function
+    ###     trait.dataframe, preferrably data centered to the parental mean, otherwise z.scores or ratio with the mean of the entire experiment could be used
+    ###   optional input
+    ###     ref.genotype, standard is "1"
+    ###     n.genes, standard is 20
+    ###     method, standard is "per.genes", other option is "interval"
+    ###     interval, standard is 1e6
+    
+    eQTL.correlator <- function(trait.dataframe,eQTL.table,ref.genotype,method,n.genes,interval){
+                                if(missing(trait.dataframe)){                                   stop("specify trait.dataframe, columns: trait, strain, ratio, sample")}
+                                if(sum(c("trait","strain","ratio","sample") %in% colnames(trait.dataframe)) != 4){
+                                                                                                stop("specify trait.dataframe, columns: trait, strain, ratio, sample")}
+                                if(missing(eQTL.table)){                                        stop("specify eQTL.table")}
+                                ###Parameters
+                                if(missing(ref.genotype)){                                      ref.genotype <- 1}
+                                if(missing(method)){                                            method <- "per.genes"}
+                                    if(missing(n.genes) & method=="per.genes"){                 n.genes <- 20}
+                                    if(missing(interval)& method=="interval"){                  interval <- 1e6}
+                                if(method %in% c("per.genes","interval") ==FALSE){              stop("choose a method from 'per.genes' and 'interval'")}
+    
+    
+                                ###Take only one type of eQTL
+                                eQTL.table <- eQTL.table[eQTL.table$qtl_type == unique(eQTL.table$qtl_type)[1],
+                                                         colnames(eQTL.table) %in% c("trait","qtl_chromosome","qtl_bp","qtl_marker","qtl_significance","qtl_effect","gene_chromosome","gene_bp","gene_WBID")]
+                                eQTL.table <- merge(eQTL.table,trait.dataframe)
+    
+    
+                                ###calculate correlation per n.genes, 20 works pretty well
+                                if(method =="per.genes"){
+                                    out <- group_by(eQTL.table,strain,gene_chromosome,sample) %>%
+                                           mutate(order_gene=ceiling(rank(gene_bp)/n.genes)) %>%
+                                           as.data.frame() %>%
+                                           group_by(strain,gene_chromosome,order_gene,sample) %>%
+                                           mutate(n.cor=length(qtl_effect)) %>%
+                                           as.data.frame() %>%
+                                           mutate(order_gene = ifelse(n.cor<n.genes/2,order_gene-1,order_gene)) %>%
+                                           group_by(strain,gene_chromosome,order_gene,sample) %>%
+                                           summarise(correlation = cor(x=ratio,y=qtl_effect*ref.genotype,use="complete.obs"),
+                                                     correlation_significance=cor.test(x=ratio,y=qtl_effect*ref.genotype,na.action=na.omit())$p.value,n.cor=length(qtl_effect),
+                                                     location_left=min(gene_bp)/1e6,location_median=median(gene_bp)/1e6,location_right=max(gene_bp)/1e6) %>%
+                                           as.data.frame()
+                                }
+    
+    
+                                if(method =="interval"){
+                                    out <- mutate(eQTL.table.output,order_gene=ceiling(gene_bp/interval)) %>%
+                                           group_by(strain,gene_chromosome,order_gene,sample) %>%
+                                           mutate(n.cor=length(qtl_effect)) %>%
+                                           as.data.frame() %>%
+                                           mutate(order_gene = ifelse(n.cor<10,order_gene-1,order_gene)) %>%
+                                           group_by(strain,gene_chromosome,order_gene,sample) %>%
+                                           mutate(n.cor=length(qtl_effect)) %>%
+                                           as.data.frame() %>%
+                                           mutate(order_gene = ifelse(n.cor<10,order_gene-1,order_gene)) %>%
+                                           group_by(strain,gene_chromosome,order_gene,sample) %>%
+                                           summarise(correlation = cor(x=ratio,y=qtl_effect*ref.genotype,use="complete.obs"),
+                                                     correlation_significance=cor.test(x=ratio,y=qtl_effect*ref.genotype,na.action=na.omit())$p.value,n.cor=length(qtl_effect),
+                                                     location_left=min(gene_bp)/1e6,location_median=median(gene_bp)/1e6,location_right=max(gene_bp)/1e6) %>%
+                                           as.data.frame()
+                                }
+    
+                                return(out)
+                               }
+
+    
+    ###     strain.map, map of the investigated strains
+    ###     strain.marker, marker file of the investigated strains    
+        
+    plot.eQTL.correlation <- function(eQTL.correlator.output,strain.map,strain.marker,filename){
+                                      if(missing(eQTL.correlator.output)){                            stop("provide output from eQTL.correlator function")}
+                                      if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                                      if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                                      rownames(strain.marker) <- 1:dim(strain.map)[1]}
+                                      if(missing(filename )){                                         filename <- "eQTL_genotype"}
+
+                                      pdf(paste(filename,".pdf",sep="_"),width=18,height=8)
+                                          for(i in 1:length(unique(eQTL.correlator.output$sample))){
+                                              plot.data <- eQTL.correlator.output[eQTL.correlator.output$sample==unique(eQTL.correlator.output$sample)[i],]
+
+                                              plot.eQTL <- ggplot(plot.data,aes(x=location_median,y=correlation,colour=n.cor)) +
+                                                           geom_point(aes(alpha=0.5)) + geom_smooth(method="loess",span=0.5,se=TRUE) + geom_segment(aes(x=location_left,y=correlation,xend=location_right,yend=correlation,alpha=0.5)) +
+                                                           facet_grid(.~gene_chromosome, scales="free") + geom_hline(yintercept=0) + presentation +
+                                                           labs(x="Marker location (Mb)",y=paste("Correlation N2 and",tolower(unlist(unique(plot.data$strain))[1]))) + ylim(-2,2) +
+                                                           ggtitle(unique(plot.data$sample))
+
+                                              plot.data <- data.frame(cbind(name=strain.marker[,1],
+                                                                            chromosome=strain.marker[,2],
+                                                                            bp=strain.marker[,3],
+                                                                            current=strain.map[,tolower(colnames(strain.map))==tolower(unlist(unique(plot.data$strain))[1])]))
+                                                  plot.data[,1] <- as.character(unlist(plot.data[,1]))
+                                                  plot.data[,2] <- as.character(unlist(plot.data[,2]))
+                                                  plot.data[,3] <- as.numeric(as.character(unlist(plot.data[,3])))
+                                                  plot.data[,4] <- as.numeric(as.character(unlist(plot.data[,4])))
+
+                                              plot.mapz <- ggplot(plot.data,aes(x=bp/1e6,y=current,colour=current)) +
+                                                           geom_point() + geom_line() + facet_grid(.~chromosome, scales="free") +
+                                                           geom_hline(yintercept=0) + presentation +
+                                                           labs(x="Marker location (Mb)",y=paste("Map",tolower(unlist(unique(plot.data$strain))[1]))) + ylim(-2,2)
+
+                                              grid.arrange(plot.eQTL , plot.mapz,
+                                                                 ncol=1,heights=c(4,2))
+
+                                           }
+                                       dev.off()
+                                      }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/functions/emma_functions.r b/functions/emma_functions.r
new file mode 100644
index 0000000..3cfc309
--- /dev/null
+++ b/functions/emma_functions.r
@@ -0,0 +1,515 @@
+emma.REML.t <- function (ys, xs, K, Z = NULL, X0 = NULL, ngrids = 100, llim = -10,ulim = 10, esp = 1e-10, ponly = FALSE){
+    if (is.null(dim(ys)) || ncol(ys) == 1) {
+        ys <- matrix(ys, 1, length(ys))
+    }
+    if (is.null(dim(xs)) || ncol(xs) == 1) {
+        xs <- matrix(xs, 1, length(xs))
+    }
+    if (is.null(X0)) {
+        X0 <- matrix(1, ncol(ys), 1)
+    }
+    g <- nrow(ys)
+    n <- ncol(ys)
+    m <- nrow(xs)
+    t <- ncol(xs)
+    q0 <- ncol(X0)
+    q1 <- q0 + 1
+    stopifnot(nrow(K) == t)
+    stopifnot(ncol(K) == t)
+    stopifnot(nrow(X0) == n)
+    if (!ponly) {
+        REMLs <- matrix(nrow = m, ncol = g)
+        vgs <- matrix(nrow = m, ncol = g)
+        ves <- matrix(nrow = m, ncol = g)
+    }
+    dfs <- matrix(nrow = m, ncol = g)
+    stats <- matrix(nrow = m, ncol = g)
+    ps <- matrix(nrow = m, ncol = g)
+    if (sum(is.na(ys)) == 0) {
+        eig.L <- emma.eigen.L(Z, K)
+        x.prev <- vector(length = 0)
+        for (i in 1:m) {
+            vids <- !is.na(xs[i, ])
+            nv <- sum(vids)
+            xv <- xs[i, vids]
+            if ((mean(xv) <= 0) || (mean(xv) >= 1)) {
+                if (!ponly) {
+                  vgs[i, ] <- rep(NA, g)
+                  ves[i, ] <- rep(NA, g)
+                  dfs[i, ] <- rep(NA, g)
+                  REMLs[i, ] <- rep(NA, g)
+                  stats[i, ] <- rep(NA, g)
+                }
+                ps[i, ] = rep(1, g)
+            }
+            else if (identical(x.prev, xv)) {
+                if (!ponly) {
+                  vgs[i, ] <- vgs[i - 1, ]
+                  ves[i, ] <- ves[i - 1, ]
+                  dfs[i, ] <- dfs[i - 1, ]
+                  REMLs[i, ] <- REMLs[i - 1, ]
+                  stats[i, ] <- stats[i - 1, ]
+                }
+                ps[i, ] <- ps[i - 1, ]
+            }
+            else {
+                if (is.null(Z)) {
+                  X <- cbind(X0[vids, , drop = FALSE], xs[i,
+                    vids])
+                  eig.R1 = emma.eigen.R.wo.Z(K[vids, vids], X)
+                }
+                else {
+                  vrows <- as.logical(rowSums(Z[, vids]))
+                  X <- cbind(X0[vrows, , drop = FALSE], Z[vrows,
+                    vids, drop = FALSE] %*% t(xs[i, vids, drop = FALSE]))
+                  eig.R1 = emma.eigen.R.w.Z(Z[vrows, vids], K[vids,
+                    vids], X)
+                }
+                for (j in 1:g) {
+                  if (nv == t) {
+                    REMLE <- emma.REMLE(ys[j, ], X, K, Z, ngrids,
+                      llim, ulim, esp, eig.R1)
+                    if (is.null(Z)) {
+                      U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values +
+                        REMLE$delta)), t, t, byrow = TRUE)
+                      dfs[i, j] <- nv - q1
+                    }
+                    else {
+                      U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values +
+                        REMLE$delta)), rep(sqrt(1/REMLE$delta),
+                        n - t)), n, n, byrow = TRUE)
+                      dfs[i, j] <- n - q1
+                    }
+                    yt <- crossprod(U, ys[j, ])
+                    Xt <- crossprod(U, X)
+                    iXX <- solve(crossprod(Xt, Xt))
+                    beta <- iXX %*% crossprod(Xt, yt)
+                    if (!ponly) {
+                      vgs[i, j] <- REMLE$vg
+                      ves[i, j] <- REMLE$ve
+                      REMLs[i, j] <- REMLE$REML
+                    }
+                    stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *
+                      REMLE$vg)
+                  }
+                  else {
+                    if (is.null(Z)) {
+                      eig.L0 <- emma.eigen.L.wo.Z(K[vids, vids])
+                      nr <- sum(vids)
+                      yv <- ys[j, vids]
+                      REMLE <- emma.REMLE(yv, X, K[vids, vids,
+                        drop = FALSE], NULL, ngrids, llim, ulim,
+                        esp, eig.R1)
+                      U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values +
+                        REMLE$delta)), nr, nr, byrow = TRUE)
+                      dfs[i, j] <- nr - q1
+                    }
+                    else {
+                      eig.L0 <- emma.eigen.L.w.Z(Z[vrows, vids,
+                        drop = FALSE], K[vids, vids])
+                      yv <- ys[j, vrows]
+                      nr <- sum(vrows)
+                      tv <- sum(vids)
+                      REMLE <- emma.REMLE(yv, X, K[vids, vids,
+                        drop = FALSE], Z[vrows, vids, drop = FALSE],
+                        ngrids, llim, ulim, esp, eig.R1)
+                      U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values +
+                        REMLE$delta)), rep(sqrt(1/REMLE$delta),
+                        nr - tv)), nr, nr, byrow = TRUE)
+                      dfs[i, j] <- nr - q1
+                    }
+                    yt <- crossprod(U, yv)
+                    Xt <- crossprod(U, X)
+                    iXX <- solve(crossprod(Xt, Xt))
+                    beta <- iXX %*% crossprod(Xt, yt)
+                    if (!ponly) {
+                      vgs[i, j] <- REMLE$vg
+                      ves[i, j] <- REMLE$ve
+                      REMLs[i, j] <- REMLE$REML
+                    }
+                    stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *
+                      REMLE$vg)
+                  }
+                }
+                ps[i, ] <- 2 * pt(abs(stats[i, ]), dfs[i, ],
+                  lower.tail = FALSE)
+            }
+        }
+    }
+    else {
+        eig.L <- emma.eigen.L(Z, K)
+        eig.R0 <- emma.eigen.R(Z, K, X0)
+        x.prev <- vector(length = 0)
+        for (i in 1:m) {
+            vids <- !is.na(xs[i, ])
+            nv <- sum(vids)
+            xv <- xs[i, vids]
+            if ((mean(xv) <= 0) || (mean(xv) >= 1)) {
+                if (!ponly) {
+                  vgs[i, ] <- rep(NA, g)
+                  ves[i, ] <- rep(NA, g)
+                  REMLs[i, ] <- rep(NA, g)
+                  dfs[i, ] <- rep(NA, g)
+                }
+                ps[i, ] = rep(1, g)
+            }
+            else if (identical(x.prev, xv)) {
+                if (!ponly) {
+                  stats[i, ] <- stats[i - 1, ]
+                  vgs[i, ] <- vgs[i - 1, ]
+                  ves[i, ] <- ves[i - 1, ]
+                  REMLs[i, ] <- REMLs[i - 1, ]
+                  dfs[i, ] <- dfs[i - 1, ]
+                }
+                ps[i, ] = ps[i - 1, ]
+            }
+            else {
+                if (is.null(Z)) {
+                  X <- cbind(X0, xs[i, ])
+                  if (nv == t) {
+                    eig.R1 = emma.eigen.R.wo.Z(K, X)
+                  }
+                }
+                else {
+                  vrows <- as.logical(rowSums(Z[, vids, drop = FALSE]))
+                  X <- cbind(X0, Z[, vids, drop = FALSE] %*%
+                    t(xs[i, vids, drop = FALSE]))
+                  if (nv == t) {
+                    eig.R1 = emma.eigen.R.w.Z(Z, K, X)
+                  }
+                }
+                for (j in 1:g) {
+                  vrows <- !is.na(ys[j, ])
+                  if (nv == t) {
+                    yv <- ys[j, vrows]
+                    nr <- sum(vrows)
+                    if (is.null(Z)) {
+                      if (nr == n) {
+                        REMLE <- emma.REMLE(yv, X, K, NULL, ngrids,
+                          llim, ulim, esp, eig.R1)
+                        U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values +
+                          REMLE$delta)), n, n, byrow = TRUE)
+                      }
+                      else {
+                        eig.L0 <- emma.eigen.L.wo.Z(K[vrows,
+                          vrows, drop = FALSE])
+                        REMLE <- emma.REMLE(yv, X[vrows, , drop = FALSE],
+                          K[vrows, vrows, drop = FALSE], NULL,
+                          ngrids, llim, ulim, esp)
+                        U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values +
+                          REMLE$delta)), nr, nr, byrow = TRUE)
+                      }
+                      dfs[i, j] <- nr - q1
+                    }
+                    else {
+                      if (nr == n) {
+                        REMLE <- emma.REMLE(yv, X, K, Z, ngrids,
+                          llim, ulim, esp, eig.R1)
+                        U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values +
+                          REMLE$delta)), rep(sqrt(1/REMLE$delta),
+                          n - t)), n, n, byrow = TRUE)
+                      }
+                      else {
+                        vtids <- as.logical(colSums(Z[vrows,
+                          , drop = FALSE]))
+                        eig.L0 <- emma.eigen.L.w.Z(Z[vrows, vtids,
+                          drop = FALSE], K[vtids, vtids, drop = FALSE])
+                        REMLE <- emma.REMLE(yv, X[vrows, , drop = FALSE],
+                          K[vtids, vtids, drop = FALSE], Z[vrows,
+                            vtids, drop = FALSE], ngrids, llim,
+                          ulim, esp)
+                        U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values +
+                          REMLE$delta)), rep(sqrt(1/REMLE$delta),
+                          nr - sum(vtids))), nr, nr, byrow = TRUE)
+                      }
+                      dfs[i, j] <- nr - q1
+                    }
+                    yt <- crossprod(U, yv)
+                    Xt <- crossprod(U, X[vrows, , drop = FALSE])
+                    iXX <- solve(crossprod(Xt, Xt))
+                    beta <- iXX %*% crossprod(Xt, yt)
+                    if (!ponly) {
+                      vgs[i, j] <- REMLE$vg
+                      ves[i, j] <- REMLE$ve
+                      REMLs[i, j] <- REMLE$REML
+                    }
+                    stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *
+                      REMLE$vg)
+                  }
+                  else {
+                    if (is.null(Z)) {
+                      vtids <- vrows & vids
+                      eig.L0 <- emma.eigen.L.wo.Z(K[vtids, vtids,
+                        drop = FALSE])
+                      yv <- ys[j, vtids]
+                      nr <- sum(vtids)
+                      REMLE <- emma.REMLE(yv, X[vtids, , drop = FALSE],
+                        K[vtids, vtids, drop = FALSE], NULL,
+                        ngrids, llim, ulim, esp)
+                      U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values +
+                        REMLE$delta)), nr, nr, byrow = TRUE)
+                      Xt <- crossprod(U, X[vtids, , drop = FALSE])
+                      dfs[i, j] <- nr - q1
+                    }
+                    else {
+                      vtids <- as.logical(colSums(Z[vrows, ,
+                        drop = FALSE])) & vids
+                      vtrows <- vrows & as.logical(rowSums(Z[,
+                        vids, drop = FALSE]))
+                      eig.L0 <- emma.eigen.L.w.Z(Z[vtrows, vtids,
+                        drop = FALSE], K[vtids, vtids, drop = FALSE])
+                      yv <- ys[j, vtrows]
+                      nr <- sum(vtrows)
+                      REMLE <- emma.REMLE(yv, X[vtrows, , drop = FALSE],
+                        K[vtids, vtids, drop = FALSE], Z[vtrows,
+                          vtids, drop = FALSE], ngrids, llim,
+                        ulim, esp)
+                      U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values +
+                        REMLE$delta)), rep(sqrt(1/REMLE$delta),
+                        nr - sum(vtids))), nr, nr, byrow = TRUE)
+                      Xt <- crossprod(U, X[vtrows, , drop = FALSE])
+                      dfs[i, j] <- nr - q1
+                    }
+                    yt <- crossprod(U, yv)
+                    iXX <- solve(crossprod(Xt, Xt))
+                    beta <- iXX %*% crossprod(Xt, yt)
+                    if (!ponly) {
+                      vgs[i, j] <- REMLE$vg
+                      ves[i, j] <- REMLE$ve
+                      REMLs[i, j] <- REMLE$REML
+                    }
+                    stats[i, j] <- beta[q1]/sqrt(iXX[q1, q1] *
+                      REMLE$vg)
+                  }
+                }
+                ps[i, ] <- 2 * pt(abs(stats[i, ]), dfs[i, ],
+                  lower.tail = FALSE)
+            }
+        }
+    }
+    if (ponly) {
+        return(ps)
+    }
+    else {
+        return(list(ps = ps, REMLs = REMLs, stats = stats, dfs = dfs,
+            vgs = vgs, ves = ves))
+    }
+}
+
+emma.kinship <- function (snps, method = "additive", use = "all")
+{
+    n0 <- sum(snps == 0, na.rm = TRUE)
+    nh <- sum(snps == 0.5, na.rm = TRUE)
+    n1 <- sum(snps == 1, na.rm = TRUE)
+    nNA <- sum(is.na(snps))
+    stopifnot(n0 + nh + n1 + nNA == length(snps))
+    if (method == "dominant") {
+        flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) >
+            0.5), nrow(snps), ncol(snps))
+        snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) &
+            (snps == 0.5)]
+    }
+    else if (method == "recessive") {
+        flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) <
+            0.5), nrow(snps), ncol(snps))
+        snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) &
+            (snps == 0.5)]
+    }
+    else if ((method == "additive") && (nh > 0)) {
+        dsnps <- snps
+        rsnps <- snps
+        flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) >
+            0.5), nrow(snps), ncol(snps))
+        dsnps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) &
+            (snps == 0.5)]
+        flags <- matrix(as.double(rowMeans(snps, na.rm = TRUE) <
+            0.5), nrow(snps), ncol(snps))
+        rsnps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) &
+            (snps == 0.5)]
+        snps <- rbind(dsnps, rsnps)
+    }
+    if (use == "all") {
+        mafs <- matrix(rowMeans(snps, na.rm = TRUE), nrow(snps),
+            ncol(snps))
+        snps[is.na(snps)] <- mafs[is.na(snps)]
+    }
+    else if (use == "complete.obs") {
+        snps <- snps[rowSums(is.na(snps)) == 0, ]
+    }
+    n <- ncol(snps)
+    K <- matrix(nrow = n, ncol = n)
+    diag(K) <- 1
+    for (i in 2:n) {
+        for (j in 1:(i - 1)) {
+            x <- snps[, i] * snps[, j] + (1 - snps[, i]) * (1 -
+                snps[, j])
+            K[i, j] <- sum(x, na.rm = TRUE)/sum(!is.na(x))
+            K[j, i] <- K[i, j]
+        }
+    }
+    return(K)
+}
+
+
+emma.eigen.L <- function (Z, K, complete = TRUE)
+{
+    if (is.null(Z)) {
+        return(emma.eigen.L.wo.Z(K))
+    }
+    else {
+        return(emma.eigen.L.w.Z(Z, K, complete))
+    }
+}
+
+emma.eigen.L.wo.Z <- function (K)
+{
+    eig <- eigen(K, symmetric = TRUE)
+    return(list(values = eig$values, vectors = eig$vectors))
+}
+
+
+emma.eigen.R.wo.Z <- function (K, X)
+{
+    n <- nrow(X)
+    q <- ncol(X)
+    S <- diag(n) - X %*% solve(crossprod(X, X)) %*% t(X)
+    eig <- eigen(S %*% (K + diag(1, n)) %*% S, symmetric = TRUE)
+    stopifnot(!is.complex(eig$values))
+    return(list(values = eig$values[1:(n - q)] - 1, vectors = eig$vectors[,
+        1:(n - q)]))
+}
+
+emma.eigen.R <- function(Z,K,X,complete=TRUE) {
+  if ( is.null(Z) ) {
+    return(emma.eigen.R.wo.Z(K,X))
+  }
+  else {
+    return(emma.eigen.R.w.Z(Z,K,X,complete))
+  }
+}
+
+emma.REMLE <- function (y, X, K, Z = NULL, ngrids = 100, llim = -10, ulim = 10,
+    esp = 1e-10, eig.L = NULL, eig.R = NULL)
+{
+    n <- length(y)
+    t <- nrow(K)
+    q <- ncol(X)
+    stopifnot(ncol(K) == t)
+    stopifnot(nrow(X) == n)
+    if (det(crossprod(X, X)) == 0) {
+        warning("X is singular")
+        return(list(REML = 0, delta = 0, ve = 0, vg = 0))
+    }
+    if (is.null(Z)) {
+        if (is.null(eig.R)) {
+            eig.R <- emma.eigen.R.wo.Z(K, X)
+        }
+        etas <- crossprod(eig.R$vectors, y)
+        logdelta <- (0:ngrids)/ngrids * (ulim - llim) + llim
+        m <- length(logdelta)
+        delta <- exp(logdelta)
+        Lambdas <- matrix(eig.R$values, n - q, m) + matrix(delta,
+            n - q, m, byrow = TRUE)
+        Etasq <- matrix(etas * etas, n - q, m)
+        LL <- 0.5 * ((n - q) * (log((n - q)/(2 * pi)) - 1 - log(colSums(Etasq/Lambdas))) -
+            colSums(log(Lambdas)))
+        dLL <- 0.5 * delta * ((n - q) * colSums(Etasq/(Lambdas *
+            Lambdas))/colSums(Etasq/Lambdas) - colSums(1/Lambdas))
+        optlogdelta <- vector(length = 0)
+        optLL <- vector(length = 0)
+        if (dLL[1] < esp) {
+            optlogdelta <- append(optlogdelta, llim)
+            optLL <- append(optLL, emma.delta.REML.LL.wo.Z(llim,
+                eig.R$values, etas))
+        }
+        if (dLL[m - 1] > 0 - esp) {
+            optlogdelta <- append(optlogdelta, ulim)
+            optLL <- append(optLL, emma.delta.REML.LL.wo.Z(ulim,
+                eig.R$values, etas))
+        }
+        for (i in 1:(m - 1)) {
+            if ((dLL[i] * dLL[i + 1] < 0 - esp * esp) && (dLL[i] >
+                0) && (dLL[i + 1] < 0)) {
+                r <- uniroot(emma.delta.REML.dLL.wo.Z, lower = logdelta[i],
+                  upper = logdelta[i + 1], lambda = eig.R$values,
+                  etas = etas)
+                optlogdelta <- append(optlogdelta, r$root)
+                optLL <- append(optLL, emma.delta.REML.LL.wo.Z(r$root,
+                  eig.R$values, etas))
+            }
+        }
+    }
+    else {
+        if (is.null(eig.R)) {
+            eig.R <- emma.eigen.R.w.Z(Z, K, X)
+        }
+        etas <- crossprod(eig.R$vectors, y)
+        etas.1 <- etas[1:(t - q)]
+        etas.2 <- etas[(t - q + 1):(n - q)]
+        etas.2.sq <- sum(etas.2 * etas.2)
+        logdelta <- (0:ngrids)/ngrids * (ulim - llim) + llim
+        m <- length(logdelta)
+        delta <- exp(logdelta)
+        Lambdas <- matrix(eig.R$values, t - q, m) + matrix(delta,
+            t - q, m, byrow = TRUE)
+        Etasq <- matrix(etas.1 * etas.1, t - q, m)
+        dLL <- 0.5 * delta * ((n - q) * (colSums(Etasq/(Lambdas *
+            Lambdas)) + etas.2.sq/(delta * delta))/(colSums(Etasq/Lambdas) +
+            etas.2.sq/delta) - (colSums(1/Lambdas) + (n - t)/delta))
+        optlogdelta <- vector(length = 0)
+        optLL <- vector(length = 0)
+        if (dLL[1] < esp) {
+            optlogdelta <- append(optlogdelta, llim)
+            optLL <- append(optLL, emma.delta.REML.LL.w.Z(llim,
+                eig.R$values, etas.1, n, t, etas.2.sq))
+        }
+        if (dLL[m - 1] > 0 - esp) {
+            optlogdelta <- append(optlogdelta, ulim)
+            optLL <- append(optLL, emma.delta.REML.LL.w.Z(ulim,
+                eig.R$values, etas.1, n, t, etas.2.sq))
+        }
+        for (i in 1:(m - 1)) {
+            if ((dLL[i] * dLL[i + 1] < 0 - esp * esp) && (dLL[i] >
+                0) && (dLL[i + 1] < 0)) {
+                r <- uniroot(emma.delta.REML.dLL.w.Z, lower = logdelta[i],
+                  upper = logdelta[i + 1], lambda = eig.R$values,
+                  etas.1 = etas.1, n = n, t1 = t, etas.2.sq = etas.2.sq)
+                optlogdelta <- append(optlogdelta, r$root)
+                optLL <- append(optLL, emma.delta.REML.LL.w.Z(r$root,
+                  eig.R$values, etas.1, n, t, etas.2.sq))
+            }
+        }
+    }
+    maxdelta <- exp(optlogdelta[which.max(optLL)])
+    maxLL <- max(optLL)
+    if (is.null(Z)) {
+        maxva <- sum(etas * etas/(eig.R$values + maxdelta))/(n -
+            q)
+    }
+    else {
+        maxva <- (sum(etas.1 * etas.1/(eig.R$values + maxdelta)) +
+            etas.2.sq/maxdelta)/(n - q)
+    }
+    maxve <- maxva * maxdelta
+    return(list(REML = maxLL, delta = maxdelta, ve = maxve, vg = maxva))
+}
+
+
+emma.delta.REML.LL.wo.Z <- function (logdelta, lambda, etas) 
+{
+    nq <- length(etas)
+    delta <- exp(logdelta)
+    return(0.5 * (nq * (log(nq/(2 * pi)) - 1 - log(sum(etas * 
+        etas/(lambda + delta)))) - sum(log(lambda + delta))))
+}
+
+
+emma.delta.REML.dLL.wo.Z <- function (logdelta, lambda, etas) 
+{
+    nq <- length(etas)
+    delta <- exp(logdelta)
+    etasq <- etas * etas
+    ldelta <- lambda + delta
+    return(0.5 * (nq * sum(etasq/(ldelta * ldelta))/sum(etasq/ldelta) - 
+        sum(1/ldelta)))
+}
\ No newline at end of file
-- 
GitLab