From 47470f76d11d2b31923c200dc7590aa09a729eb6 Mon Sep 17 00:00:00 2001
From: Sterken <mark.sterken@wur.nl>
Date: Wed, 2 Aug 2023 21:59:58 +0200
Subject: [PATCH] added all custom functions

---
 Function_scripts/H2.broad.R                   | 150 ++++++++++++++++++
 Function_scripts/NEMA.get.functions.R         |  54 +++++++
 Function_scripts/QTL.data.prep.R              |  73 +++++++++
 Function_scripts/QTL.eQTL.call.TBs.R          |  60 +++++++
 Function_scripts/QTL.eQTL.table.R             |  53 +++++++
 Function_scripts/QTL.eQTL.table.addTB.R       |  77 +++++++++
 Function_scripts/QTL.map.1.FDR.R              |  92 +++++++++++
 Function_scripts/QTL.map.1.R                  |  87 ++++++++++
 Function_scripts/QTL.map.1.perm.R             |  85 ++++++++++
 Function_scripts/QTL.map.1.sim.R              | 124 +++++++++++++++
 Function_scripts/QTL.map1.dataframe.R         |  58 +++++++
 Function_scripts/QTL.peak.finder.R            |  60 +++++++
 Function_scripts/QTL.sim.data.per.marker.R    |  67 ++++++++
 Function_scripts/QTL.table.addR2.R            |  54 +++++++
 Function_scripts/R.sq.R                       |  15 ++
 Function_scripts/enrich.easy.R                |  52 ++++++
 Function_scripts/enrich.get.geneIDs.R         |  32 ++++
 Function_scripts/enrich.more.R                |  28 ++++
 Function_scripts/enrich.one.R                 |  35 ++++
 Function_scripts/fast.lm.1.R                  |  21 +++
 Function_scripts/fast.summ.R                  |  33 ++++
 Function_scripts/fast.summ.transcriptomics.R  |  33 ++++
 Function_scripts/genetic.df.to.mat.R          |  67 ++++++++
 Function_scripts/genetic.distance.R           |  81 ++++++++++
 .../genetic.informative.markers.R             |  27 ++++
 Function_scripts/lm.1.R                       |  29 ++++
 Function_scripts/permutate.traits.R           |  20 +++
 Function_scripts/plot.genotype.strain.R       | 108 +++++++++++++
 Function_scripts/prep.ggplot.QTL.profile.R    |  20 +++
 Function_scripts/prep.ggplot.eQTL.table.R     |  36 +++++
 Function_scripts/prep.ggplot.genetic.map.R    |  52 ++++++
 .../transcriptomics.age.predictor.R           |  49 ++++++
 Function_scripts/transcriptomics.df.to.list.R |  48 ++++++
 Function_scripts/transcriptomics.lm.R         |  88 ++++++++++
 Function_scripts/transgression.calc.R         |  73 +++++++++
 mainscript_aS_eQTL.R                          |   4 +-
 36 files changed, 2043 insertions(+), 2 deletions(-)
 create mode 100644 Function_scripts/H2.broad.R
 create mode 100644 Function_scripts/NEMA.get.functions.R
 create mode 100644 Function_scripts/QTL.data.prep.R
 create mode 100644 Function_scripts/QTL.eQTL.call.TBs.R
 create mode 100644 Function_scripts/QTL.eQTL.table.R
 create mode 100644 Function_scripts/QTL.eQTL.table.addTB.R
 create mode 100644 Function_scripts/QTL.map.1.FDR.R
 create mode 100644 Function_scripts/QTL.map.1.R
 create mode 100644 Function_scripts/QTL.map.1.perm.R
 create mode 100644 Function_scripts/QTL.map.1.sim.R
 create mode 100644 Function_scripts/QTL.map1.dataframe.R
 create mode 100644 Function_scripts/QTL.peak.finder.R
 create mode 100644 Function_scripts/QTL.sim.data.per.marker.R
 create mode 100644 Function_scripts/QTL.table.addR2.R
 create mode 100644 Function_scripts/R.sq.R
 create mode 100644 Function_scripts/enrich.easy.R
 create mode 100644 Function_scripts/enrich.get.geneIDs.R
 create mode 100644 Function_scripts/enrich.more.R
 create mode 100644 Function_scripts/enrich.one.R
 create mode 100644 Function_scripts/fast.lm.1.R
 create mode 100644 Function_scripts/fast.summ.R
 create mode 100644 Function_scripts/fast.summ.transcriptomics.R
 create mode 100644 Function_scripts/genetic.df.to.mat.R
 create mode 100644 Function_scripts/genetic.distance.R
 create mode 100644 Function_scripts/genetic.informative.markers.R
 create mode 100644 Function_scripts/lm.1.R
 create mode 100644 Function_scripts/permutate.traits.R
 create mode 100644 Function_scripts/plot.genotype.strain.R
 create mode 100644 Function_scripts/prep.ggplot.QTL.profile.R
 create mode 100644 Function_scripts/prep.ggplot.eQTL.table.R
 create mode 100644 Function_scripts/prep.ggplot.genetic.map.R
 create mode 100644 Function_scripts/transcriptomics.age.predictor.R
 create mode 100644 Function_scripts/transcriptomics.df.to.list.R
 create mode 100644 Function_scripts/transcriptomics.lm.R
 create mode 100644 Function_scripts/transgression.calc.R

diff --git a/Function_scripts/H2.broad.R b/Function_scripts/H2.broad.R
new file mode 100644
index 0000000..9ef6160
--- /dev/null
+++ b/Function_scripts/H2.broad.R
@@ -0,0 +1,150 @@
+###function for broad-sense heritability
+#currently 3 methods are supported: 
+#   ANOVA (very basic method) [ref?]
+#   REML (based on lme4 package) [ref?]
+#   Keurentjes_2007 (based on Brem, 2005 and Keurentjes, 2007)
+#   all methods support permutation
+
+###input
+#trait.value: a vector with trait values
+#strain: a vector with strain names matching the trait values
+#trait: a vector with trait names (e.g you can enter multiple traits this way)
+#method: one of three methods: ANOVA, REML, or Keurentjes_2007 (standard is ANOVA because it's quick)
+#parental.strain: names of parental strains (only for Keurentjes; otherwise parental strains should be removed from the analysis)
+#n.perm: number of permutations to be done (standard 1000)
+#Vg.factor: fraction of the measured genotypic variance should be counted towards total variance (standard is 1)
+
+###output
+#a dataframe with traits per row and columns with the heritability, underlying values and an FDR0.05 threshold
+
+###update
+#made a function that determines the fdr based on the actual value (not only if it's significant anymore)
+
+    H2.broad <- function(trait.value,strain,trait,method,parental.strain,n.perm,Vg.factor){
+                         if(missing(trait.value)){                                  stop("requires trait value")}
+                         if(missing(strain)){                                       stop("requires strain values")}
+                         if(missing(trait)){                                        trait <- rep("A",length(trait.value))}
+                         if(missing(method)){                                       method <- "ANOVA"}
+                         if(!method %in% c("ANOVA","REML","Keurentjes_2007")){      stop("method should be either ANOVA, REML, or Keurentjes_2007")}
+                         if(method=="Keurentjes_2007"){
+                             if(missing(parental.strain)){                          stop("when using Keurentjes_2007, parental strain names should be provided")}
+                             if(sum(parental.strain %in% strain) != length(parental.strain)){
+                                                                                    stop("not all parental strains are in the data")}
+                         }
+                         if(missing(n.perm)){                                       n.perm <- 1000}
+                         if(missing(Vg.factor)){                                    Vg.factor <- 1}
+
+      
+                          ###calculates H2 based on REML (part of H2.broad)
+                          H2.REML <- function(input,Vg.factor){
+                                              if(missing(Vg.factor)){Vg.factor <- 1}
+                                              
+                                              ##test if lme4 package is installed
+                                              if(!require("lme4")){
+                                                  install.packages("lme4")
+                                                  library("lme4")
+                                              }
+                            
+                                              REML <- lmer(trait.value ~ 1 + (1|strain),data=input)
+                                              Variations <- as.data.frame(VarCorr(REML))$vcov
+                          
+                                              H2 <- c((Variations[1]*Vg.factor)/sum(Variations*c(Vg.factor,1)),(Variations*c(Vg.factor,1)))
+                                                  names(H2) <- c("H2_REML","Vg","Ve")
+                                              return(H2)
+                                             }
+                      
+                          ###Calculates H2 based on ANOVA (part of H2.broad)  
+                          H2.ANOVA <- function(input,Vg.factor){
+                                               if(missing(Vg.factor)){Vg.factor <- 1}
+                            
+                                               ANOVA <- anova(lm(trait.value~strain,data=input))
+                                               Variations <- ANOVA[[2]]
+                          
+                                               H2 <- c((Variations[1]*Vg.factor)/sum(Variations*c(Vg.factor,1)),(Variations*c(Vg.factor,1)))
+                                                   names(H2) <- c("H2_ANOVA","Vg","Ve")
+                                               return(H2)
+                                              }
+                      
+                          ###Calculates H2 as described in Keurentjes, 2007, in case error has to be estimated on limited number of strains
+                          ###Permutation is based on Brem, 2005
+                          H2.Keurentjes <- function(input,parental.strain,Vg.factor){
+                                                    if(missing(Vg.factor)){Vg.factor <- 1}
+                            
+                                                    selc.parental <- input$strain %in% parental.strain
+                          
+                                                    ###Ve is estimated based on parental replicates
+                                                    VAR.PL <- tapply(input$trait.value[selc.parental],as.character(unlist(input$strain[selc.parental])),var)
+                                                    N.PL <- tapply(input$trait.value[selc.parental],as.character(unlist(input$strain[selc.parental])),length)
+                                                    VAR.E <- sum(VAR.PL*(N.PL-1))/sum((N.PL-1))
+                          
+                                                    ##Vt is estimated based on RILs
+                                                    VAR.T <- var(input$trait.value[!selc.parental])
+                          
+                                                    H2 <- c(((VAR.T-VAR.E)*Vg.factor)/VAR.T,((VAR.T-VAR.E)*Vg.factor),VAR.E)
+                                                        names(H2) <- c("H2_keurentjes","Vg","Ve")
+                                                    return(H2)
+                                                   }      
+      
+                         permfunct <- function(x,val){
+                                               out <- NULL
+                                               for(j in 1:ncol(x)){
+                                                   out <- c(out,max(which(rev(sort(c(val[j],x[,j])) >=val[j])))/(length(x[,j])+1))
+                                               }  
+                                               return(out)
+                                              }  
+                          
+                         ###Prep the data
+                         input <- data.frame(cbind(trait.value=trait.value,strain=strain))
+                             input$trait.value <- as.numeric(as.character(unlist(input$trait.value)))
+                             input$strain <- as.factor(as.character(unlist(input$strain)))
+
+                         input <- split(input,trait)
+
+                         if(method == "ANOVA"){
+                             H2 <- t(sapply(input,H2.ANOVA,Vg.factor))
+
+                             perm <- NULL
+                             for(i in 1:n.perm){
+                                 input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                 H2.perm <- t(sapply(input.perm,H2.ANOVA,Vg.factor))
+                                 perm <- rbind(perm,H2.perm[,1])
+                             }
+
+                             H2 <- cbind(H2,FDR=permfunct(perm,H2[,1]))
+                         }
+                         if(method == "REML"){
+                             H2 <- t(sapply(input,H2.REML,Vg.factor))
+
+                             perm <- NULL
+                             for(i in 1:n.perm){
+                                 input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                 H2.perm <- t(sapply(input.perm,H2.REML,Vg.factor))
+                                 perm <- rbind(perm,H2.perm[,1])
+                             }
+
+
+                             H2 <- cbind(H2,FDR=permfunct(perm,H2[,1]))
+                         }
+                         if(method == "Keurentjes_2007"){
+                             H2 <- t(sapply(input,H2.Keurentjes,parental.strain = parental.strain,Vg.factor))
+                             
+                             perm <- NULL
+                             for(i in 1:n.perm){
+                                 input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                 H2.perm <- t(sapply(input.perm,H2.Keurentjes,parental.strain = parental.strain,Vg.factor))
+                                 perm <- rbind(perm,H2.perm[,1])
+                             }                             
+                             
+                             H2 <- cbind(H2,FDR=permfunct(perm,H2[,1]))
+                         }
+
+                         output <- as.data.frame(cbind(trait=names(input),H2))
+                             output[,1] <- as.character(unlist(output[,1]))
+                             output[,2] <- round(as.numeric(as.character(unlist(output[,2]))),digits=3)
+                             output[,3] <- round(as.numeric(as.character(unlist(output[,3]))),digits=3)
+                             output[,4] <- round(as.numeric(as.character(unlist(output[,4]))),digits=3)
+                             output[,5] <- as.numeric(as.character(unlist(output[,5])))
+
+                         return(output)
+                        }
+
diff --git a/Function_scripts/NEMA.get.functions.R b/Function_scripts/NEMA.get.functions.R
new file mode 100644
index 0000000..233d2f6
--- /dev/null
+++ b/Function_scripts/NEMA.get.functions.R
@@ -0,0 +1,54 @@
+###Writes all custom NEMA_functions scripts to a specified folder
+
+###input
+#file_script (script of which you want to get the NEMA_functions from)
+#NEMA_functions_dir (location of the nema functions)
+#write_dir (location where to write to, standard is current wd to a folder called "Function_scripts")
+
+###output
+#a folder with functions
+
+###Description
+
+NEMA.get.functions <- function(file_script,NEMA_functions_dir,write_dir="Function_scripts"){
+                               if(missing(file_script)){               stop("requires a source script")}
+                               if(missing(NEMA_functions_dir)){        NEMA_functions_dir <- "E:/Nemawork/Projects_R_zone/Git/NEMA_functions"}
+
+                               ###get write_dir
+                                   dir.create("Function_scripts", showWarnings = F)
+                                   write_dir <- paste(getwd(),write_dir,sep="/")
+  
+                               ####compile a list of the nema functions as currently exist
+                                   functionlist <- dir(paste(NEMA_functions_dir,"/R/",sep=""))
+                               ###remove the .R
+                                   functionlist <- gsub(".R$","",functionlist)
+                               
+                               ###load source script
+                                   sourcescript <- read.table(file = file_script,sep="\n",comment.char="",blank.lines.skip = FALSE,quote="")
+                               
+                               ###search in source script
+                                   output <- NULL
+                                   for(i in 1:length(functionlist)){
+                                       output <- c(output,grepl(functionlist[i],sourcescript))
+                                   }
+                               
+                               ###Go through five times for nested functions
+                               ###load source script
+                                   for(k in 1:5){
+                                       for(j in which(output)){
+                                           sourcescript <- read.table(file = paste(NEMA_functions_dir,"/R/",functionlist[j],".R",sep=""),sep="\n",comment.char="",blank.lines.skip = FALSE,quote="")
+                                           
+                                           ###search in source script
+                                           for(i in 1:length(functionlist)){
+                                               if(grepl(functionlist[i],sourcescript)){
+                                                   output[i] <- TRUE
+                                               }
+                                           }
+                                       }
+                                   }
+                               
+                               ###copy functions to folder
+                               for(i in which(output)){
+                                   file.copy(from = paste(NEMA_functions_dir,"/R/",functionlist[i],".R",sep=""), to = write_dir) 
+                               }                        
+}
diff --git a/Function_scripts/QTL.data.prep.R b/Function_scripts/QTL.data.prep.R
new file mode 100644
index 0000000..8e6b570
--- /dev/null
+++ b/Function_scripts/QTL.data.prep.R
@@ -0,0 +1,73 @@
+###Data preparation for QTL mapping; start function for most of subsequent functions
+
+###input
+#trait.matrix (matrix; traits per row and strains per column)
+#strain.trait (vector with strain names per trait. Should match strain.map)
+#strain.map (markers in rows, genotypes in columns)
+#strain.marker (markers in rows, with columns name, chromosome and position)
+
+###output
+#a list witht the entries Trait, Map, and Marker. Where the Traits are aligned with the map
+
+###Description
+#Arranges the trait and the map data so they are aligned and runs some basic checks on the input data
+#removes the non-present strains it warns about
+
+###See also
+#QTL.map.1
+
+
+
+###Data preparation
+    QTL.data.prep <- function(trait.matrix,strain.trait,strain.map,strain.marker){
+                              if(missing(trait.matrix)){                             stop("requires trait data, traits per row and strains per column")}
+                              if(missing(strain.trait)){                             stop("requires the strain names matching the trait data")}
+                              if(missing(strain.map)){                               stop("requires the genetic map of the population")}
+                              if(missing(strain.marker)){                            stop("requires the marker info (name, chromosome, basepair) of the map")}
+                              if(ncol(trait.matrix) != length(strain.trait)){        stop("the number of strains does not correspond with the trait matrix")}
+                              if(sum(tolower(strain.trait) %in% tolower(colnames(strain.map))) < length(strain.trait)){
+                                                                                     warning(paste("The strains: ",paste(strain.trait[!tolower(strain.trait) %in% tolower(colnames(strain.map))],collapse=", ")," are not in the strain.map file"))
+                              }
+                              if(nrow(strain.map) != nrow(strain.marker)){           stop("the number of markers (strain.map) does not equal the number of marker descriptions (strain.marker)")}
+                              if(ncol(strain.marker) != 3){                          stop("make sure the strain.marker file contains a column with the marker name, chromosome, and bp location")}
+                              if(nrow(trait.matrix)==1){                             small <- TRUE}
+                              if(nrow(trait.matrix)!=1){                             small <- FALSE}
+                              
+                              strain.map <- data.matrix(strain.map)
+
+                              ###remove strains not present
+                              if(small){
+                                  rowna <- rownames(trait.matrix)
+                                  trait.matrix <- trait.matrix[1,which(tolower(strain.trait) %in% tolower(colnames(strain.map)))]
+                                  colna <- names(trait.matrix)
+                                  trait.matrix <- t(matrix(trait.matrix))
+                                      colnames(trait.matrix) <- colna
+                                      rownames(trait.matrix) <- rowna
+                              } else {
+                                  trait.matrix <- trait.matrix[,which(tolower(strain.trait) %in% tolower(colnames(strain.map)))]
+                                  strain.trait <- strain.trait[tolower(strain.trait) %in% tolower(colnames(strain.map))]
+                              }
+                                  
+                              ###Make matrix
+                              map.nu <- matrix(NA,nrow=nrow(strain.map),ncol=length(strain.trait))
+                              for(j in 1:length(strain.trait)){
+                                  map.nu[,j] <- strain.map[,tolower(colnames(strain.map)) == tolower(strain.trait[j])]
+                              }
+                              colnames(map.nu) <- strain.trait
+                              rownames(map.nu) <- rownames(strain.map)
+
+                              ###add rownames to the trait matrix
+                              if(length(rownames(trait.matrix)) == 0){
+                                  rownames(trait.matrix) <- 1:nrow(trait.matrix)
+                              }
+
+                              ###Make output
+                              output <- NULL
+                              output[[1]] <- trait.matrix
+                              output[[2]] <- map.nu
+                              output[[3]] <- data.frame(strain.marker)
+                              names(output) <- c("Trait","Map","Marker")
+
+                              ###return output
+                              return(output)
+                             }
\ No newline at end of file
diff --git a/Function_scripts/QTL.eQTL.call.TBs.R b/Function_scripts/QTL.eQTL.call.TBs.R
new file mode 100644
index 0000000..e64b308
--- /dev/null
+++ b/Function_scripts/QTL.eQTL.call.TBs.R
@@ -0,0 +1,60 @@
+###Calls transbands based on a poisson distribution
+#Brem, Rachel B., and Leonid Kruglyak. "The landscape of genetic complexity across 5,700 gene expression traits in yeast." Proceedings of the National Academy of Sciences 102.5 (2005): 1572-1577.
+
+###input
+#eQTL.table.file (output from QTL.eQTL.table)
+#window (interval size in which QTL are counted)
+#chromosomes (names of the chromosomes; standard is C. elegans)
+#chromosome_size (sizes of the chromosomes; standard is C. elegans)
+
+###output
+#a dataframe with sifnificances for cis- and trans-eQTL enrichments
+
+###Description
+
+
+
+QTL.eQTL.call.TBs <- function(eQTL.table.file,window,chromosomes,chromosome_size){
+                               if(missing(eQTL.table.file)){                        stop("need a eQTL.table.file")}
+                               if(missing(window)){                                 window <- 1.0e6; warning("set default window-size, might not be optimal")}
+                               if(missing(chromosomes)){                            chromosomes <- c("I","II","III","IV","V","X")}
+                               if(missing(chromosome_size)){                        chromosome_size <- c(15072434,15279421,13783801,17493829,20924180,17718942)}
+                               if(length(chromosomes) != length(chromosome_size)){  stop("chromosome names and sizes should have the same length")}
+
+                               maxsize <- max(chromosome_size+window)  
+                               chrnum <- length(chromosomes)
+    
+                               ###call locations with many eQTL
+                               transband.id <- dplyr::mutate(eQTL.table.file,Interval=findInterval(qtl_bp,seq(1,maxsize,by=window))) %>%
+                                               dplyr::filter(!is.na(qtl_type)) %>%
+                                               dplyr::group_by(qtl_chromosome,Interval,qtl_type) %>%
+                                               dplyr::summarise(n.ct=length(unique(trait))) %>%
+                                               data.frame() %>%
+                                               dplyr::group_by(qtl_type) %>%
+                                               dplyr::mutate(exp.ct=mean(as.numeric(unlist(n.ct)))) %>%
+                                               data.frame() %>%
+                                               dplyr::mutate(transband_significance=ppois(n.ct,lambda=exp.ct,lower.tail=F))
+                                
+                               ###Add locations that have no cis/trans eQTL
+                               transband.id <- rbind(transband.id,data.frame(cbind(qtl_chromosome=rep(chromosomes,each=ceiling(maxsize/window)*2),Interval=rep(1:ceiling(maxsize/window),times=chrnum*2),
+                                                                                   qtl_type=rep(rep(c("cis","trans"),each=ceiling(maxsize/window)),times=chrnum),
+                                                                                   n.ct=0,
+                                                                                   exp.ct=rep(rep(c(filter(transband.id,!duplicated(exp.ct),qtl_type=="cis")$exp.ct,filter(transband.id,!duplicated(exp.ct),qtl_type=="trans")$exp.ct),each=ceiling(maxsize/window)),times=chrnum),
+                                                                                   transband_significance=1))) %>%
+                                               dplyr::mutate(tester=paste(qtl_chromosome,Interval,qtl_type)) %>%
+                                               dplyr::filter(!duplicated(tester)) %>%
+                                               dplyr::mutate(Interval=as.numeric(as.character(unlist(Interval))),transband_significance=as.numeric(transband_significance)) %>%
+                                               dplyr::mutate(Interval_left=(Interval-1)*window,Interval_right=1+Interval*window) %>%
+                                               dplyr::arrange(qtl_type,qtl_chromosome,Interval)
+                               
+                               ###resize to chromosome sizes
+                               output <- NULL
+                               for(i in 1:chrnum){
+                                   output <- rbind(output,filter(transband.id,qtl_chromosome==chromosomes[i],Interval_right<(chromosome_size[i]+window)))
+                               }
+                               for(i in c(2,4,5,6,8,9)){
+                                   output[,i] <- as.numeric(as.character(unlist(output[,i])))
+                               }
+                               
+                               return(output)
+                              }
\ No newline at end of file
diff --git a/Function_scripts/QTL.eQTL.table.R b/Function_scripts/QTL.eQTL.table.R
new file mode 100644
index 0000000..74c4f8c
--- /dev/null
+++ b/Function_scripts/QTL.eQTL.table.R
@@ -0,0 +1,53 @@
+###function for making an eQTL table
+
+###input
+#QTL.peak.dataframe (output from QTL.peak.finder function)
+#trait.annotation (dataframe with columns: trait, gene_chromosome, gene_bp, gene_database_ID, gene_name_1, gene_name_2)
+#cis.trans.window (how cis and trans eQTL are distinguised: physical, LOD.drop, or both)
+#cis.trans.window.size (physical window size, standard 1000000)
+#colnames.trait.annotations (names for the trait annotations added)
+
+
+###output
+#a dataframe listing the eQTL per trait.
+
+###Description    
+# Makes a table with eQTL peaks v1.1
+# QTL.peak.dataframe: output from peak.finder
+# trait.annotation: gene ID's of the mapped traits
+# cis.trans.window: Cis-trans windowd can be defined based on physical distance "physical", based on LOD-drop interval "LOD.drop", or on both "both"
+    
+QTL.eQTL.table <- function(QTL.peak.dataframe, trait.annotation,cis.trans.window,cis.trans.window.size,colnames.trait.annotations){
+                           if(missing(QTL.peak.dataframe)){                                  stop("need an eQTL list file")}
+                           if(missing(trait.annotation)){                                    stop("need annotation file with order: trait, gene_chromosome, gene_bp, gene_WBID, gene_sequence_name, gene_public_name")}
+                           if(missing(cis.trans.window)){                                    cis.trans.window <- "physical"} 
+                           if(!cis.trans.window %in% c("physical","LOD.drop","both")){       stop("cis.trans.window should be in physical, LOD.drop, or both")}
+                           if(missing(cis.trans.window.size)){                               cis.trans.window.size <- 1000000}
+                           if(missing(colnames.trait.annotations)){                          colnames.trait.annotations <- c("trait","gene_chromosome","gene_bp","gene_WBID","gene_sequence_name","gene_public_name")}
+
+                           ###Rename colnames traits
+                           colnames(trait.annotation) <- colnames.trait.annotations
+
+                           ###Take only the peaks
+                           if(cis.trans.window =="physical"){
+                               output <- dplyr::filter(QTL.peak.dataframe,!is.na(qtl_peak)) %>%
+                                         merge(trait.annotation,by.x=1,by.y=1) %>%
+                                         dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & abs(qtl_bp-gene_bp) < cis.trans.window.size),"cis","trans"))
+                           }
+                      
+                           if(cis.trans.window =="LOD.drop"){
+                               output <- dplyr::filter(QTL.peak.dataframe,!is.na(qtl_peak)) %>%
+                                         merge(trait.annotation,by.x=1,by.y=1) %>%
+                                         dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & (gene_bp > qtl_bp_left & gene_bp < qtl_bp_right)),"cis","trans"))
+                           }
+                           
+                           if(cis.trans.window =="both"){
+                               output <- dplyr::filter(QTL.peak.dataframe,!is.na(qtl_peak)) %>%
+                                         merge(trait.annotation,by.x=1,by.y=1) %>%
+                                         dplyr::mutate(qtl_type = ifelse((qtl_chromosome==gene_chromosome & abs(qtl_bp-gene_bp) < cis.trans.window.size) |
+                                                                         ((qtl_chromosome==gene_chromosome & abs(qtl_bp-gene_bp) < cis.trans.window.size) & (gene_bp > qtl_bp_left & gene_bp < qtl_bp_right)),"cis","trans"))
+                           }
+                           
+                           ###Return file
+                           return(output)
+                          }
diff --git a/Function_scripts/QTL.eQTL.table.addTB.R b/Function_scripts/QTL.eQTL.table.addTB.R
new file mode 100644
index 0000000..5b08e98
--- /dev/null
+++ b/Function_scripts/QTL.eQTL.table.addTB.R
@@ -0,0 +1,77 @@
+###Adds whether a QTL is located in a trans-band (or cis-enriched area)
+
+###input
+#eQTL.table.file (output from QTL.eQTL.table)
+#eQTL.call.TBs.file (output from QTL.eQTL.call.TBs)
+#transband_thr (statistical threshold for calling a trans-band)
+#qtl_type_select (type of qtl to call enrichment for)
+#merge_condition (how adjacent chromosomal intervals should be for merging trans-bands)
+#digits_Mb (amount of digits behind the Mb should appear in TB-name)
+
+###output
+#the eQTL.table.file with a trans_band column added
+
+###Description
+
+###version
+#Fixed issue with merging transbands
+
+QTL.eQTL.table.addTB <- function(eQTL.table.file,call.transbands.file,transband_thr,qtl_type_select,merge_condition,digits_Mb){                               
+                                   if(missing(eQTL.table.file)){                        stop("need a eQTL.table.file")}
+                                   if(missing(call.transbands.file)){                   stop("need a call.transbands.file")}
+                                   if(missing(transband_thr)){                          transband_thr <- 0.0001; warning("set default significance, might not be optimal")}
+                                   if(missing(qtl_type_select)){                        qtl_type_select <- "trans"}
+                                   if(missing(merge_condition)){                        merge_condition <- 2}
+                                   if(missing(digits_Mb)){                              digits_Mb <- 1}                
+                                    
+                                   transbands <- dplyr::filter(call.transbands.file,transband_significance < transband_thr,qtl_type==qtl_type_select) %>%
+                                                 dplyr::group_by(qtl_chromosome) %>%
+                                                 dplyr::mutate(mergeornot=c(0,diff(Interval))) %>%
+                                                 data.frame()
+                                   
+                                   transbands.merged <- NULL
+                                   for(i in 1:length(unique(transbands$qtl_chromosome))){
+                                       tmp <- dplyr::filter(transbands,qtl_chromosome == unique(transbands$qtl_chromosome)[i])
+                                       tmp2 <- NULL
+                                       if(nrow(tmp) > 1){
+                                           for(j in 2:nrow(tmp)){
+                                               if(tmp$mergeornot[j] <= merge_condition){
+                                                   tmp2 <- data.frame(rbind(tmp2,cbind(qtl_chromosome=tmp[j,1], qtl_type=tmp[j,3], n.ct=sum(tmp[c((j-1),j),4]), exp.ct=sum(tmp[c((j-1),j),5]), 
+                                                                                       transband_significance=max(tmp[j,6]), Interval_left=tmp[j-1,8], Interval_right=tmp[j,9])))
+                                                   for(k in c(3,4,5,6,7)){tmp2[,k] <- as.numeric(as.character(unlist(tmp2[,k])))}  
+                                               } else {
+                                                   if(j==2){
+                                                       tmp3 <- tmp[1:2,-c(2,7,10)]
+                                                       for(k in c(3,4,5,6,7)){tmp3[,k] <- as.numeric(as.character(unlist(tmp3[,k])))}  
+                                                       tmp2 <- rbind(tmp2,tmp3)
+                                                       for(k in c(3,4,5,6,7)){tmp2[,k] <- as.numeric(as.character(unlist(tmp2[,k])))}  
+                                                   } else {
+                                                       tmp3 <- tmp[j,-c(2,7,10)]
+                                                       for(k in c(3,4,5,6,7)){tmp3[,k] <- as.numeric(as.character(unlist(tmp3[,k])))}  
+                                                       tmp2 <- rbind(tmp2,tmp3)
+                                                       for(k in c(3,4,5,6,7)){tmp2[,k] <- as.numeric(as.character(unlist(tmp2[,k])))}  
+                                                   }
+                                               }
+                                           }
+                                       } else {
+                                           tmp2 <- tmp[,-c(2,7,10)]
+                                           for(k in c(3,4,5,6,7)){tmp2[,k] <- as.numeric(as.character(unlist(tmp2[,k])))}  
+                                       }
+                                       transbands.merged <- data.frame(rbind(transbands.merged,tmp2))
+                                       for(k in c(3,4,5,6,7)){transbands.merged[,k] <- as.numeric(as.character(unlist(transbands.merged[,k])))}  
+                                   }
+                                   for(k in c(1,2)){transbands.merged[,k] <- as.character(unlist(transbands.merged[,k]))}  
+                                   transbands.merged <- mutate(transbands.merged,Name=paste(qtl_chromosome,":",round(Interval_left/1e6,digits=digits_Mb),"-",round(Interval_right/1e6,digits=digits_Mb),"Mb",sep=""))
+                                   
+                                   output <- dplyr::mutate(eQTL.table.file,trans_band = "none")
+                                   
+                                   for(i in 1:nrow(transbands.merged)){
+                                        output[output$qtl_type == transbands.merged$qtl_type[i] &
+                                               output$qtl_chromosome == transbands.merged$qtl_chromosome[i] &
+                                               output$qtl_bp > transbands.merged$Interval_left[i] &
+                                               output$qtl_bp <= transbands.merged$Interval_right[i] &
+                                               !is.na(output$qtl_type) & !is.na(output$qtl_chromosome) & !is.na(output$qtl_bp),]$trans_band <- transbands.merged$Name[i]
+                                   }
+                                   
+                                   return(output)
+                                  }
diff --git a/Function_scripts/QTL.map.1.FDR.R b/Function_scripts/QTL.map.1.FDR.R
new file mode 100644
index 0000000..c587f75
--- /dev/null
+++ b/Function_scripts/QTL.map.1.FDR.R
@@ -0,0 +1,92 @@
+###FDR calculation function for single marker mapping
+#Snoek & Sterken, 2017; Sterken, 2017
+
+###input
+#map1.output (output from the QTL.map.1 function)
+#filenames.perm (files containing permutated data)
+#FDR_dir (location of the files with permutated data)
+#q.value (desired q-value)
+#small (logic; set to TRUE for a small set)
+
+###output
+#a list with names: LOD, Effect, Trait_perm, Map, and Marker.
+
+###Description
+# Based on Benjamini and Yakuteili multiple testing under dependency
+
+
+
+QTL.map.1.FDR <- function(map1.output,filenames.perm,FDR_dir,q.value,small){
+                         if(missing(map1.output)){        stop("Missing map1.output ([[LOD]],[[Effect]],[[Trait]],[[Map]],[[Marker]])")}
+                         if(missing(filenames.perm)){     stop("Need a vector with filenames for permutation")}
+                         if(missing(FDR_dir)){            FDR_dir <- getwd()}
+                         if(missing(q.value)){            q.value <- 0.025}
+                         if(missing(small)){              small <- FALSE}
+
+                         output <- as.list(NULL)
+
+                         output[[2]] <- matrix(0,ncol=length(filenames.perm),nrow=5000)
+                         rownames(output[[2]]) <- seq(0.1,500,by=0.1)
+                         colnames(output[[2]]) <- filenames.perm
+
+                         ###in case of one trait
+                         if(small){
+                             if(length(filenames.perm) > 1){
+                                 stop("needs one permutatation file, with n permutations")
+                             }
+                             tmp <- load(file=paste(FDR_dir,filenames.perm,sep=""))
+                             ###No NA's
+                             tmp <- get(tmp)[[1]]
+                             tmp[is.na(tmp)] <- 0.1
+                             
+                             ###no Non-traits
+                             tmp <- tmp[rownames(tmp) != "",]
+                             output[[2]] <- apply(tmp,1,max,na.rm=T)
+                             
+                             if((length(output[[2]]) * q.value) < 10){
+                                  warning(paste("inadvisable to calculate q=",q.value,"based on ",length(output[[2]])," permutations. Increase to at least ", ceiling(10/q.value)))
+                             }
+                             
+                             ###take the q-cutoff, *10 is to be consistent with eQTL methodology (is per 0.1)
+                             output[[1]] <- rev(sort(output[[2]]))[ceiling(q.value*length(output[[2]]))]*10
+                             
+                             ###again, to be consistent; 
+                             output[[3]] <- mean(output[[2]],na.rm = TRUE)
+                             
+                             ###real data
+                             output[[4]] <- max(map1.output$LOD,na.rm=TRUE)
+                             
+                             names(output) <- c("Significance_threshold","False_discoveries","False_discoveries_average","Real_discoveries")
+                         }
+                         if(!small){
+                             for(i in 1:length(filenames.perm)){
+                                 tmp <- load(file=paste(FDR_dir,filenames.perm[i],sep=""))
+                                 ###No NA's
+                                 tmp <- get(tmp)[[1]]
+                                 tmp[is.na(tmp)] <- 0.1
+
+                                 tmp <- table(apply(round(tmp,digits=1),1,max,na.rm=T))
+
+                                 output[[2]][(as.numeric(names(tmp))*10),i] <- tmp
+                                 output[[2]][,i] <- rev(cumsum(rev(output[[2]][,i])))
+                             }
+                             ###Take the average over permutations
+                             output[[3]] <- apply(output[[2]],1,mean)
+
+                             ###Calculate the real discoveries, no NA's
+                             tmp <- map1.output$LOD
+                             tmp[is.na(tmp)] <- 0.1
+
+                             RDS.tmp <- table(round(apply(tmp,1,max,na.rm=T),digits=1))
+                             RDS <- rep(0,times=5000); RDS[(as.numeric(names(RDS.tmp))*10)] <- RDS.tmp
+                             RDS <- rev(cumsum(rev(RDS)))
+
+                             output[[4]] <- RDS
+
+                             output[[1]] <- which(round(((dim(map1.output$LOD)[1]-RDS)/dim(map1.output$LOD)[1])*q.value*log10(dim(map1.output$LOD)[1]),digits=3)-round(output[[3]]/output[[4]],digits=3)>0)[1]
+                             names(output) <- c("Significance_threshold","False_discoveries","False_discoveries_average","Real_discoveries")
+                         }
+                         
+                         ###Return
+                         return(output)
+                        }
diff --git a/Function_scripts/QTL.map.1.R b/Function_scripts/QTL.map.1.R
new file mode 100644
index 0000000..958a1b9
--- /dev/null
+++ b/Function_scripts/QTL.map.1.R
@@ -0,0 +1,87 @@
+###function for single marker mapping
+#Snoek & Sterken, 2017; Sterken, 2017
+
+###input
+#trait.matrix (traits in rows, genotypes in columns)
+#strain.map (markers in rows, genotypes in columns)
+#strain.marker (markers in rows, with columns name, chromosome and position)
+
+###output
+#a list with names: LOD, Effect, Trait, Map, and Marker.
+
+###Description
+# map.1 is optimized for dataset in which many markers are un-informative
+# these markers are not mapped but the p-vals and effect are copied from the previous marker.
+
+QTL.map.1 <- function(trait.matrix,strain.map,strain.marker){
+                      if(missing(trait.matrix)){                                      stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")}
+                      if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                      if(ncol(strain.map) !=  dim(as.matrix(trait.matrix))[2] &
+                         ncol(strain.map) !=  prod(dim(as.matrix(trait.matrix)))){    stop("number of strains is not equal to number of genotypes in map")}
+                      if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                      rownames(strain.marker) <- 1:dim(strain.map)[1]}
+
+                      ###Check for NAs
+                      NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0
+
+                      small <- FALSE
+                      if(dim(as.matrix(trait.matrix))[1] ==1){
+                          traitnames <- rownames(trait.matrix)
+                          trait.matrix <- rbind(trait.matrix,trait.matrix)
+                          rownames(trait.matrix) <- c("A","B")
+                          small <- TRUE
+                      }
+
+                      eff.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+                      pval.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+
+                      for(i in 1:nrow(strain.map)){
+                          noseg <- length(unique(strain.map[i,])) ==1
+                          if(noseg){
+                              output.tmp <- matrix(c(0,0),byrow=TRUE,ncol=2,nrow=nrow(trait.matrix))
+                          }
+                          if( i == 1 & !noseg){
+                              if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])}
+                              if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])}
+                          }
+                          if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) != 0 & !noseg){
+                              if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])}
+                              if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])}
+                          }
+                          if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) == 0 & !noseg){
+                              output.tmp <- output.tmp
+                          }
+                          eff.out[,i] <- output.tmp[,2]
+                          pval.out[,i] <- output.tmp[,1]
+                      }
+                      if(!small){
+                        colnames(eff.out) <- rownames(strain.marker)
+                        rownames(eff.out) <- rownames(trait.matrix)
+                        colnames(pval.out) <- rownames(strain.marker)
+                        rownames(pval.out) <- rownames(trait.matrix)
+                      }
+                      if(small){
+                          eff.out <- matrix(eff.out[1,],nrow=1)
+                              rownames(eff.out) <- traitnames
+                              colnames(eff.out) <- rownames(strain.map)
+                          pval.out <- matrix(pval.out[1,],nrow=1)
+                              rownames(pval.out) <- traitnames
+                              colnames(pval.out) <- rownames(strain.map)
+                          trait.matrix <- matrix(trait.matrix[1,],nrow=1)
+                              rownames(trait.matrix) <- traitnames
+                              colnames(trait.matrix) <- colnames(strain.map)
+                      }
+
+                      output <- NULL; output <- as.list(output)
+                      output[[1]] <- round(pval.out,digits=2)
+                      output[[2]] <- round(eff.out,digits=3)
+                      output[[3]] <- trait.matrix
+                      output[[4]] <- strain.map
+                      output[[5]] <- strain.marker
+                      names(output) <- c("LOD","Effect","Trait","Map","Marker")
+                      return(output)
+                     }
+
+
+
+
diff --git a/Function_scripts/QTL.map.1.perm.R b/Function_scripts/QTL.map.1.perm.R
new file mode 100644
index 0000000..5226111
--- /dev/null
+++ b/Function_scripts/QTL.map.1.perm.R
@@ -0,0 +1,85 @@
+###Permutation function for single marker mapping
+#Snoek & Sterken, 2017; Sterken, 2017
+
+###input
+#trait.matrix (traits in rows, genotypes in columns)
+#strain.map (markers in rows, genotypes in columns)
+#strain.marker (markers in rows, with columns name, chromosome and position)
+#n.perm (the number of times each trait should be permutated)
+
+###output
+#a list with names: LOD, Effect, Trait_perm, Map, and Marker.
+
+###Description
+# map.1 is optimized for dataset in which many markers are un-informative
+# these markers are not mapped but the p-vals and effect are copied from the previous marker.
+
+QTL.map.1.perm <- function(trait.matrix,strain.map,strain.marker,n.perm){
+                           if(missing(trait.matrix)){                                      stop("specify trait.matrix, matrix with traits in rows and genotypes in columns")}
+                           if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                           if(ncol(strain.map) !=  dim(as.matrix(trait.matrix))[2] &
+                              ncol(strain.map) !=  prod(dim(as.matrix(trait.matrix)))){    stop("number of strains is not equal to number of genotypes in map")}
+                           if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                           rownames(strain.marker) <- 1:dim(strain.map)[1]}
+                           if(missing(n.perm)){                                            n.perm <- 1000}
+                           if(n.perm == 1 & dim(as.matrix(trait.matrix))[2] ==1){          stop("cannot conduct 1 permutation on 1 trait, set n.perm > 1")}
+
+
+                           NAs <- sum(is.na(trait.matrix))>0 | sum(is.na(strain.map))>0
+
+                           small <- FALSE
+                           if(dim(as.matrix(trait.matrix))[1] ==1){
+                               traitnames <- rownames(trait.matrix)
+                               trait.matrix <- rbind(trait.matrix,trait.matrix)
+                               rownames(trait.matrix) <- c(traitnames,"B")
+                               small <- TRUE
+                           }
+
+                           traitnames <- rownames(trait.matrix)
+                           trait.matrix <- trait.matrix[rep(1:nrow(trait.matrix),each=n.perm),]
+                           trait.matrix <- permutate.traits(trait.matrix)
+                           rownames(trait.matrix) <- paste(rep(traitnames,each=n.perm),1:n.perm,sep="_")
+
+                           if(small){
+                               trait.matrix <- trait.matrix[1:n.perm,]
+                           }
+
+                           eff.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+                           pval.out <- matrix(NA,nrow(trait.matrix),nrow(strain.map))
+                          
+                           for(i in 1:nrow(strain.map)){
+                               noseg <- length(unique(strain.map[i,])) ==1
+                               if(noseg){
+                                   output.tmp <- matrix(c(0,0),byrow=TRUE,ncol=2,nrow=nrow(trait.matrix))
+                               }
+                               if( i == 1 & !noseg){
+                                   if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])}
+                                   if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])}
+                               }
+                               if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) != 0 & !noseg){
+                                   if(!NAs){output.tmp <- fast.lm.1(trait.matrix,strain.map[i,])}
+                                   if(NAs){output.tmp <- lm.1(trait.matrix,strain.map[i,])}
+                               }
+                               if( i != 1 & sum(abs(strain.map[i-1,]-strain.map[i,]),na.rm=T) == 0 & !noseg){
+                                   output.tmp <- output.tmp
+                               }
+                               eff.out[,i] <- output.tmp[,2]
+                               pval.out[,i] <- output.tmp[,1]
+                           }
+
+                       colnames(eff.out) <- rownames(strain.marker)
+                       rownames(eff.out) <- rownames(trait.matrix)
+                       colnames(pval.out) <- rownames(strain.marker)
+                       rownames(pval.out) <- rownames(trait.matrix)
+
+
+                       output <- NULL; output <- as.list(output)
+                       output[[1]] <- round(pval.out,digits=2)
+                       output[[2]] <- round(eff.out,digits=3)
+                       output[[3]] <- trait.matrix
+                       output[[4]] <- strain.map
+                       output[[5]] <- strain.marker
+                       names(output) <- c("LOD","Effect","Trait_perm","Map","Marker")
+                       
+                       return(output)                           
+                      }
diff --git a/Function_scripts/QTL.map.1.sim.R b/Function_scripts/QTL.map.1.sim.R
new file mode 100644
index 0000000..700be0d
--- /dev/null
+++ b/Function_scripts/QTL.map.1.sim.R
@@ -0,0 +1,124 @@
+###Conduct mapping simulation to investigate power
+#Sterken, 2017
+
+###input
+#strain.map (markers in rows, genotypes in columns)
+#strain.marker (markers in rows, with columns name, chromosome and position)
+#n_per_marker (number of simulated QTL per marker)
+#sigma_peak (standard deviation of QTL peak; standard normal)
+#sigma_error (standard deviation of noise; standard normal)
+
+###output
+#a dataframe with per rows the outcomes of the simulated peak-sizes. The number of true, false, and undetected
+#  QTL; quantiles of the QTL effect-size estimation; quantiles of the QTL location estimation
+
+###Description
+# Noise based on standard normal
+# Fractional genotypes are transformed to integer numbers using round for simulating QTL
+# Heterozygousity may be simulated using '0' or NA; these are ignored in assigning QTL effects but instead are given
+#  an effect based on a uniform distribution.
+
+###Updates
+# QTL detection was very strict (peak at location of peak, changed that to a peak within a percentage of the genetic map (2.5%))
+
+
+QTL.map.1.sim <- function(strain.map,strain.marker,n_per_marker,sigma_peak,sigma_error,threshold,peak_within_perc){
+                            if(missing(strain.map)){                                        stop("specify strain.map (genetic map)")}
+                            if(missing(strain.marker)){                                     strain.marker <- data.frame(cbind(name=1:dim(strain.map)[1],chromosome=NA,bp=1:dim(strain.map)[1]))
+                                                                                            rownames(strain.marker) <- 1:dim(strain.map)[1]}
+                            if(missing(n_per_marker)){                                      n_per_marker <-10}
+                            if(missing(sigma_peak)){                                        sigma_peak <- c(1,1.15,1.3,1.47,1.64,1.82,2,2.2,2.47,2.73,3.05,3.45,4)}
+                            if(missing(sigma_error)){                                       sigma_error <- 1}
+                            if(missing(threshold)){                                         threshold <- c(3.5)}
+                            if(missing(peak_within_perc)){                                  peak_within_perc = 0.0125}
+                            ###1 sigma difference on 1 sigma noise is 20% of variance
+                            ###2 sigma difference on 1 sigma noise is 50% of variance
+                            ###3 sigma difference on 1 sigma noise is 69% of variance
+                            ###4 sigma difference on 1 sigma noise is 80% of variance; summary(lm(c(rnorm(10000,0,1),rnorm(10000,4,1))~rep(c(-1,1),each=10000)))
+                            ###cbind(var_expl=seq(5,95,by=5),sigma=c(0.46,0.67,0.84,1,1.15,1.3,1.47,1.64,1.82,2,2.2,2.47,2.73,3.05,3.45,4,4.75,6.0,8.75))
+    
+                            which.max.rev <- function(x){length(x)-which.max(rev(x))+1}
+                            
+                            out <- NULL
+                            for(i in sigma_peak){
+                                ###Generate peaked data with random error
+                                    sim.map.file <- QTL.sim.data.per.marker(strain.map,strain.marker,n_per_marker,sigma_peak=i,sigma_error)
+                                    sim.emp.file <- QTL.sim.data.per.marker(strain.map,strain.marker,n_per_marker,sigma_peak=0,sigma_error)
+    
+                                ###Map it
+                                    sim.map.file <- QTL.map.1(sim.map.file[[1]],sim.map.file[[2]],sim.map.file[[3]])
+                                    
+                                    ###locate peak
+                                    loc_peak1 <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,which.max))))
+                                    loc_peak2 <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,which.max.rev))))                                    
+                                    loc_peak <- floor(apply(cbind(loc_peak1,loc_peak2),1,median))
+                                    
+                                    ###effects and lod
+                                    lod_peak <- as.numeric(t(sim.map.file$LOD))[loc_peak+seq(0,((nrow(sim.map.file$Map)^2)*n_per_marker)-nrow(sim.map.file$Map),by=nrow(sim.map.file$Map))]
+                                    eff_peak <- as.numeric(t(sim.map.file$Effect))[loc_peak+seq(0,((nrow(sim.map.file$Map)^2)*n_per_marker)-nrow(sim.map.file$Map),by=nrow(sim.map.file$Map))]
+                                                                        
+                                    ###effects and location of simulated peak
+                                    loc_sim <- rep(1:nrow(sim.map.file$Map),each=n_per_marker)
+
+                                    ###peak is true if: above threshold and within % of total genetic map
+                                    ###peak is false if: above threshold and outside % of total genetic map
+                                    ###peak is not detected if: no significant peak
+                                    out.sim <- NULL
+                                        out.sim <- c(out.sim,detected_true=sum(lod_peak>=threshold & abs(loc_peak-loc_sim) < floor(nrow(sim.map.file$Map)*peak_within_perc)))
+                                        out.sim <- c(out.sim,detected_false=sum(lod_peak>=threshold & abs(loc_peak-loc_sim) >= floor(nrow(sim.map.file$Map)*peak_within_perc)))
+                                        out.sim <- c(out.sim,detected_not = sum(lod_peak<threshold))
+                                            selc <- lod_peak>=threshold & abs(loc_peak-loc_sim) < floor(nrow(sim.map.file$Map)*peak_within_perc)
+                                        out.sim <- c(out.sim,detected_true_eff_est=quantile(abs(eff_peak[selc])/(i/2),probs=c(0.05,0.25,0.5,0.75,0.95)))
+                                        out.sim <- c(out.sim,detected_true_loc_est=quantile(abs(as.numeric(as.character(unlist(sim.map.file$Marker[loc_sim,3])))-as.numeric(as.character(unlist(sim.map.file$Marker[loc_peak+ceiling((loc_peak2-loc_peak)/2),3]))))[selc],probs=c(0.05,0.25,0.5,0.75,0.95)))
+    
+                                ###Map False file
+                                    sim.map.file <- QTL.map.1(sim.emp.file[[1]],sim.emp.file[[2]],sim.emp.file[[3]])
+    
+                                    loc_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,which.max))))
+                                    lod_peak <- as.numeric(as.character(unlist(apply(sim.map.file$LOD,1,max,na.rm=TRUE))))
+                                    eff_peak <- t(sim.map.file$Effect)[(ncol(sim.map.file$Effect)*(0:(nrow(sim.map.file$Effect)-1)))+loc_peak]
+    
+                                    loc_sim <- rep(1:nrow(sim.map.file$Map),each=n_per_marker)
+
+                                    out.emp <- NULL
+                                        out.emp <- c(out.emp,detected_true=0)
+                                        out.emp <- c(out.emp,detected_false=sum(lod_peak >= threshold))
+                                        out.emp <- c(out.emp,detected_not = sum(lod_peak < threshold))
+    
+                                        out.emp <- c(out.emp,detected_true_eff_est=quantile(rep(NA,times=100),na.rm=T,probs=c(0.05,0.25,0.5,0.75,0.95)))
+                                        out.emp <- c(out.emp,detected_true_loc_est=quantile(rep(NA,times=100),na.rm=T,probs=c(0.05,0.25,0.5,0.75,0.95)))
+                                ###Combine and summarize
+                                    out.sim <- data.frame(cbind(name=c(paste("peak_sim_",i,sep=""),paste("noise_sim_",sigma_error,sep="")),
+                                                                var_explained=c(round(summary(lm(c(rnorm(100000,0,sigma_error),rnorm(100000,i,sigma_error))~rep(c(-1,1),each=100000)))$adj.r.squared,digits=2),round(summary(lm(c(rnorm(100000,0,sigma_error),rnorm(100000,0,sigma_error))~rep(c(-1,1),each=100000)))$adj.r.squared,digits=2)),
+                                                                rbind(out.sim,out.emp)))
+    
+                                    out <- rbind(out,out.sim)
+                            }
+                            
+                            ###Make sure numbers are numeric and characters are characteristic                            
+                            for(i in 2:ncol(out)){
+                                out[,i] <- as.numeric(as.character(unlist(out[,i])))
+                            }
+                            out[,1] <- as.character(unlist(out[,1]))
+
+
+                            ###summarize all noise simulations
+                            out[max(grep("noise_sim",out[,1])),-1] <- apply(out[grep("noise_sim",out[,1]),-1],2,sum)
+                            out <- out[c(grep("peak_sim",out[,1]),max(grep("noise_sim",out[,1]))),]
+                            
+                            ##Add percentage detected & percentage false
+                            output <- cbind(out,
+                                            percentage_detected=round(out$detected_true/(out$detected_true+out$detected_false+out$detected_not),digits=3)*100,
+                                            percentage_false=round(out$detected_false/(out$detected_true+out$detected_false+out$detected_not),digits=3)*100,
+                                            type=unlist(strsplit(out$name,split="_"))[seq(1,nrow(out)*3,by=3)],
+                                            sigma=unlist(strsplit(out$name,split="_"))[seq(3,nrow(out)*3,by=3)])
+                                                            
+                            output <- output[,c(18,19,2,3:5,16,17,6:15)]
+                            colnames(output)[grepl("_est.",colnames(output))] <- paste("quantile_",unlist(strsplit(colnames(output)[grepl("_est.",colnames(output))],split="_est."))[seq(2,2*sum(grepl("_est.",colnames(output))),by=2)],sep="")
+                            colnames(output) <- gsub("\\.","",colnames(output))
+                            
+                            colnames(output) <- paste(c(rep("Simulation",times=3),rep("QTL_detection",times=5),rep("Effect_size_estimation_(quantiles)",times=5),rep("QTL-true_peak_location_distance_(quantiles)",times=5)),colnames(output),sep=".")
+                            
+
+                            return(output)
+}
diff --git a/Function_scripts/QTL.map1.dataframe.R b/Function_scripts/QTL.map1.dataframe.R
new file mode 100644
index 0000000..eec8c50
--- /dev/null
+++ b/Function_scripts/QTL.map1.dataframe.R
@@ -0,0 +1,58 @@
+###function for transforming output of QTL.map.1 to a dataframe
+
+###input
+#QTL.map.1 output
+#small (logic) if a single trait was mapped set to TRUE
+
+###output
+#dataframe with the columns trait, qtl_chromosome, qtl_bp, qtl_marker, qtl_significance, and qtl_effect
+
+###Description
+#This transforms the relevant output to a dataframe, making handling in dplyr and plotting using ggplot easier
+#This takes output of map.1 functions (list file with LOD, effect, Trait, and Map and converts it to a list.
+#Optimized (a bit) by generating an empty matrix and filling the columns one-by-one.
+#added an option for single traits
+
+    QTL.map1.dataframe <- function(map1.output,small){
+                                if(missing(map1.output)){        stop("Missing map1.output ([[LOD]],[[Effect]],[[Trait]],[[Map]],[[Marker]])")}
+                                if(missing(small)){              small <- FALSE}
+      
+                                if(is.null(dim(map1.output[[1]]))){
+                                    small <- TRUE
+                                }
+ 
+                                if(small){
+                                    ###Split up to long format
+                                    output <- matrix(NA,length(map1.output[[1]]),6)
+                                    output[,1] <- rep(rownames(map1.output[[3]])[1],each=length(map1.output[[1]]))
+                                    output[,2] <- as.character(unlist(map1.output[[5]][,2]))
+                                    output[,3] <- as.numeric(as.character(unlist(map1.output[[5]][,3])))
+                                    output[,4] <- c(1:length(map1.output[[1]]))
+                                    output[,5] <- c(map1.output[[1]])
+                                    output[,6] <- c(map1.output[[2]])
+                                    output <- as.data.frame(output)
+                                }     
+                                if(!small){
+                                    ###Split up to long format
+                                    output <- matrix(NA,ncol(map1.output[[1]])*nrow(map1.output[[1]]),6)
+                                    output[,1] <- rep(rownames(map1.output[[1]]),each=ncol(map1.output[[1]]))
+                                    output[,2] <- rep(as.character(unlist(map1.output[[5]][,2])),times=nrow(map1.output[[1]]))
+                                    output[,3] <- rep(as.numeric(as.character(unlist(map1.output[[5]][,3]))),times=nrow(map1.output[[1]]))
+                                    output[,4] <- rep(c(1:ncol(map1.output[[1]])),times=nrow(map1.output[[1]]))
+                                    output[,5] <- c(t(map1.output[[1]]))
+                                    output[,6] <- c(t(map1.output[[2]]))
+                                    output <- as.data.frame(output)
+                                }
+
+                                ###overwrite dataframe formats
+                                output[,1] <- as.character(unlist(output[,1]))
+                                output[,2] <- as.character(unlist(output[,2]))
+                                output[,3] <- as.numeric(as.character(unlist(output[,3])))
+                                output[,4] <- as.numeric(as.character(unlist(output[,4])))
+                                output[,5] <- as.numeric(as.character(unlist(output[,5])))
+                                output[,6] <- as.numeric(as.character(unlist(output[,6])))
+                                colnames(output) <- c("trait","qtl_chromosome","qtl_bp","qtl_marker","qtl_significance","qtl_effect")
+
+                                ###Return
+                                return(output)
+                               }
diff --git a/Function_scripts/QTL.peak.finder.R b/Function_scripts/QTL.peak.finder.R
new file mode 100644
index 0000000..9011547
--- /dev/null
+++ b/Function_scripts/QTL.peak.finder.R
@@ -0,0 +1,60 @@
+###function for single marker mapping
+#Snoek & Sterken, 2017; Sterken, 2017
+
+###input
+#map1.dataframe; output of the QTL.map1.dataframe function
+#threshold; the significance treshold for calling QTL (in -log10(p))
+#LOD.drop; the decrease in LOD-score compared to the peak to call the boundaries of the QTL. Standard is 1.5
+
+###output
+#the original dataframe with the added columns: qtl_peak, qtl_marker_left, qtl_bp_left, qtl_marker_right, qtl_bp_right
+#these columns are only non-NA where a QTL peak is located.
+
+###Description
+#This takes output of mapping.to.list function and identiefies peaks and confidence intervals v1.3
+#Based on a given threshold and a 1.5 LOD-drop.
+#v1.2 made the function more efficient, the peak borders are selected in one processing
+#v1.3 fixed the problem calling the peak if the peak borders the edge of the chromosome
+#v1.4 fixed 'larger then, smaller then' bug
+#v1.5 fixed 'incompatible types' bug; NA is not recognized as numeric
+#v1.6 rewritten the code, so it is fully in dplyr framework
+
+QTL.peak.finder <- function(map1.dataframe,threshold,LOD.drop){
+                            if(missing(map1.dataframe)){            stop("Missing map1.dataframe (trait, qtl_chromosome, qtl_bp, qtl_marker, qtl_significance, qtl_effect)")}
+                            if(missing(threshold)){            stop("Missing threshold")}
+                            if(missing(LOD.drop)){             LOD.drop <- 1.5}
+
+                            ###locate the peak
+                            output <- dplyr::group_by(map1.dataframe,trait,qtl_chromosome) %>%
+                                      dplyr::mutate(qtl_peak_tmp=ifelse(qtl_significance == max(qtl_significance) & max(qtl_significance)>=threshold,1,NA)) %>%
+                                      dplyr::group_by(trait,qtl_chromosome,qtl_peak_tmp) %>%
+                                      dplyr::mutate(qtl_peak=ifelse(is.na(qtl_peak_tmp),NA,
+                                                               ifelse(abs(qtl_marker - round(mean(qtl_marker),digits = 0)) == min(abs(qtl_marker - round(mean(qtl_marker),digits = 0))),qtl_marker,NA))) %>%
+                                      dplyr::group_by(trait,qtl_chromosome) %>%
+                                      dplyr::mutate(Border=ifelse((qtl_significance < max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold),T,
+                                                             ifelse((qtl_bp == min(qtl_bp) & min(qtl_bp) %in% qtl_bp[qtl_significance >= max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold]),T,
+                                                               ifelse((qtl_bp == max(qtl_bp) & max(qtl_bp) %in% qtl_bp[qtl_significance >= max(qtl_significance)-LOD.drop & max(qtl_significance)>=threshold]),T,F))),
+                                                    peakloc=ifelse(sum(!is.na(qtl_peak)) == 1,qtl_peak[!is.na(qtl_peak)],NA)) %>%
+                                      dplyr::group_by(trait,qtl_chromosome,Border) %>%
+                                      dplyr::mutate(qtl_marker_left=as.numeric(ifelse(Border,ifelse(qtl_marker == max(qtl_marker[qtl_marker <= peakloc]),qtl_marker,NA),NA)),
+                                                    qtl_bp_left=as.numeric(ifelse(Border,ifelse(qtl_bp == max(qtl_bp[qtl_marker <= peakloc]),qtl_bp,NA),NA)),
+                                                    qtl_marker_right=as.numeric(ifelse(Border,ifelse(qtl_marker == min(qtl_marker[qtl_marker >= peakloc]),qtl_marker,NA),NA)),
+                                                    qtl_bp_right=as.numeric(ifelse(Border,ifelse(qtl_bp == min(qtl_bp[qtl_marker >= peakloc]),qtl_bp,NA),NA))) %>% 
+                                      dplyr::group_by(trait,qtl_chromosome) %>%
+                                      dplyr::mutate(qtl_peak_tmp=ifelse(sum(!is.na(qtl_peak))==1,1,0)) %>%
+                                      dplyr::group_by(trait,qtl_chromosome,qtl_peak_tmp) %>%
+                                      dplyr::mutate(qtl_marker_left=ifelse(qtl_peak_tmp==1,qtl_marker_left[!is.na(qtl_marker_left)],NA),
+                                                    qtl_bp_left=ifelse(qtl_peak_tmp==1,qtl_bp_left[!is.na(qtl_bp_left)],NA),
+                                                    qtl_marker_right=ifelse(qtl_peak_tmp==1,qtl_marker_right[!is.na(qtl_marker_right)],NA),
+                                                    qtl_bp_right=ifelse(qtl_peak_tmp==1,qtl_bp_right[!is.na(qtl_bp_right)],NA)) %>%
+                                      dplyr::ungroup() %>%
+                                      dplyr::select(trait,qtl_chromosome,qtl_bp,qtl_marker,qtl_significance,qtl_effect,qtl_peak,qtl_marker_left,qtl_bp_left,qtl_marker_right,qtl_bp_right) %>%
+                                      dplyr::mutate(qtl_marker_left=ifelse(!is.na(qtl_peak),qtl_marker_left,NA),
+                                                    qtl_bp_left=ifelse(!is.na(qtl_peak),qtl_bp_left,NA),
+                                                    qtl_marker_right=ifelse(!is.na(qtl_peak),qtl_marker_right,NA),
+                                                    qtl_bp_right=ifelse(!is.na(qtl_peak),qtl_bp_right,NA))
+                        
+                            ###return called peaks
+                            return(output)
+                           }
+
diff --git a/Function_scripts/QTL.sim.data.per.marker.R b/Function_scripts/QTL.sim.data.per.marker.R
new file mode 100644
index 0000000..1f6bc50
--- /dev/null
+++ b/Function_scripts/QTL.sim.data.per.marker.R
@@ -0,0 +1,67 @@
+###Simulate simple additive QTL
+#Sterken, 2017
+
+###input
+#strain.map (markers in rows, genotypes in columns)
+#strain.marker (markers in rows, with columns name, chromosome and position)
+#n_per_marker (number of simulated QTL per marker)
+#sigma_peak (standard deviation of QTL peak; standard normal)
+#sigma_error (standard deviation of noise; standard normal)
+
+###output
+#a list with: Simulated traits ([[1]]), Map ([[2]]), and Marker ([[3]]).
+
+###Description
+# Noise based on standard normal
+# Fractional genotypes are transformed to integer numbers using round for simulating QTL
+# Heterozygousity may be simulated using '0' or NA; these are ignored in assigning QTL effects but instead are given
+#  an effect based on a uniform distribution.
+
+
+QTL.sim.data.per.marker <- function(strain.map,strain.marker,n_per_marker,sigma_peak,sigma_error,replicates){
+                                    if(missing(replicates)){                               replicates <- 1}
+  
+                                    if(sum(strain.map%%1>0,na.rm=T)>0){
+                                          ###fractional loci are treated as closest genotype
+                                          strain.map.use <- round(strain.map,digits = 0)
+                                          warning("set fractional genotypes to closest genotype using round() for simulating QTL")
+                                      } else {
+                                          strain.map.use <- strain.map
+                                      }
+    
+                                      sim.map.file <- list()
+                                      
+                                      sim.map.file[[2]] <- data.matrix(strain.map.use)
+                                          types <- unique(as.numeric(as.character(unlist(sim.map.file[[2]]))))
+                                          types <- types[!is.na(types)]
+                                          if(length(types) > 2){
+                                              types <- sort(types)[c(1,length(types))]
+                                              warning(paste("more than 2 genotypes detected, only simulating with the most extreme numbers:",paste(types,collapse=" "),sep=" "))
+                                          }
+                                          sim.map.file[[2]] <- sim.map.file[[2]][rep(1:nrow(sim.map.file[[2]]),each=n_per_marker),]
+                                      
+                                      for(i in 1:replicates){
+                                          tmp <- sim.map.file[[2]]
+                                              tmp[sim.map.file[[2]]==types[1]] <- sigma_peak
+                                              tmp[sim.map.file[[2]]==types[2]] <- 0
+                                              ###markers of unknown genotype get filled with a random number
+                                              tmp[!sim.map.file[[2]]%in%types] <- runif(sum(!sim.map.file[[2]]%in%types),0,sigma_peak)
+                                              tmp <- tmp + rnorm(n=length(tmp),mean=0,sd=sigma_error)
+                                              rownames(tmp) <- paste(rownames(tmp),rep(1:n_per_marker,times=nrow(sim.map.file[[2]])/n_per_marker),sep="_")
+                                      
+                                          if(i == 1){
+                                              sim.map.file[[1]] <- tmp
+                                          } else {
+                                              sim.map.file[[1]] <- tmp + sim.map.file[[1]]
+                                          }
+                                      }
+                                      ###average
+                                          sim.map.file[[1]] <- sim.map.file[[1]] / replicates
+                                          
+                                      ###paste original strain map back
+                                      sim.map.file[[2]] <- strain.map
+                                      
+                                      sim.map.file[[3]] <- strain.marker
+                                      
+                                      return(sim.map.file)
+                                     }    
diff --git a/Function_scripts/QTL.table.addR2.R b/Function_scripts/QTL.table.addR2.R
new file mode 100644
index 0000000..4a35038
--- /dev/null
+++ b/Function_scripts/QTL.table.addR2.R
@@ -0,0 +1,54 @@
+###function for determining variance explained by QTL
+
+###input
+#QTL.table.file (should contain marker locations: qtl_marker, qtl_marker_1, qtl_marker_2 and trait names)
+#QTL.list.file (list with QTL data, should contain entries Map and Trait)
+
+###output
+#the R-squared is added to the QTL.table.file
+
+###Description
+
+# This function will add the sum of squares of the used model to a QTL table output, it also calculates the r-squared of the model
+
+
+    QTL.table.addR2 <- function(QTL.table.file,QTL.list.file){
+                                 if(missing(QTL.table.file)){                          stop("Give a QTL summary table, should contain marker locations (qtl_marker, qtl_marker_1, qtl_marker_2) and trait names")}
+                                 if(missing(QTL.list.file)){                           stop("Give the input on which was mapped (output of QTL.data.prep)")}
+
+                                 ###R-squared function
+                                 R.sq <- function(x,y){return(cor(x,y,use="na.or.complete")^2)}
+                                 
+                                 ###True if single marker mapping
+                                 if("qtl_marker" %in% colnames(QTL.table.file)){
+                                     output <- NULL
+                                     for(i in 1:nrow(QTL.table.file)){
+                                         ###get marker
+                                         mrk <- QTL.list.file$Map[as.numeric(as.character(unlist(QTL.table.file$qtl_marker[i]))),]
+                                         ###get expression
+                                         y <- unlist(QTL.list.file$Trait[rownames(QTL.list.file$Trait)==as.character(unlist(QTL.table.file$trait[i])),])
+                                         ###run model
+                                         output <- c(output,R.sq(y,mrk))
+                                     }
+                                     output <- data.frame(cbind(QTL.table.file,qtl_R2_sm=output))
+                                 }
+
+                                 ###True if multiple marker mapping
+                                 if("marker_1" %in% colnames(QTL.table.file) & "marker_2" %in% colnames(QTL.table.file)){
+                                     output <- cbind(QTL.table.file,SS_int_mrk1=NA,SS_int_mrk2=NA,SS_int=NA,SS_int_res=NA,SS_add_mrk1=NA,SS_add_mrk2=NA,SS_add_res=NA)
+                                     for(i in 1:nrow(QTL.table.file)){
+                                         ###get marker
+                                         mrk1 <- QTL.list.file$Map[QTL.table.file$marker_1[i],]
+                                         mrk2 <- QTL.list.file$Map[QTL.table.file$marker_2[i],]
+                                         ###get expression
+                                         y <- unlist(QTL.list.file$Trait[rownames(QTL.list.file$Trait)==QTL.table.file$trait[i],])
+                                         ###run model
+                                         output[i,colnames(output)%in%c("SS_int_mrk1","SS_int_mrk2","SS_int","SS_int_res")] <- anova(lm(y ~ mrk1*mrk2))$"Sum Sq"
+                                         output[i,colnames(output)%in%c("SS_add_mrk1","SS_add_mrk2","SS_add_res")] <- anova(lm(y ~ mrk1+mrk2))$"Sum Sq"
+
+                                     }
+                                     output <- dplyr::mutate(output,r2_add=(SS_add_mrk1+SS_add_mrk2)/(SS_add_mrk1+SS_add_mrk2+SS_add_res),r2_int=SS_int/(SS_int_mrk1+SS_int_mrk2+SS_int+SS_int_res))
+                                 }
+                                 return(output)
+                                }
+
diff --git a/Function_scripts/R.sq.R b/Function_scripts/R.sq.R
new file mode 100644
index 0000000..1a2d822
--- /dev/null
+++ b/Function_scripts/R.sq.R
@@ -0,0 +1,15 @@
+###calculates R-squared as in a linear model
+
+###input
+#x (vector)
+#y (vector)
+
+
+###output
+#squared correlation coeffcient
+
+###Description
+
+    R.sq <- function(x,y){return(cor(x,y,use="na.or.complete")^2)}
+    
+      
\ No newline at end of file
diff --git a/Function_scripts/enrich.easy.R b/Function_scripts/enrich.easy.R
new file mode 100644
index 0000000..6e06882
--- /dev/null
+++ b/Function_scripts/enrich.easy.R
@@ -0,0 +1,52 @@
+###main enrichment function
+
+###input
+#gene_list (list of genes: WBID, public names, etc.)
+#gene_group (group genes belong to; if not defined assumed to be a single group)
+#gene_total (all genes detected with platform (e.g. Microarray))
+#database_dir (location of Enrich.DB.Rdata file)
+#group_filter_n (minimum size of group; standard is 3)
+#overlap_filter_n (minimum overlap; standard is 2)
+
+###output
+#dataframe with enrichment,  columns: Term, Annotation, Group, Genes_in_group, Overlap_expected, Overlap, Significance, Bonferroni
+
+###Description
+# Main enrichment function, calls on enrich.one, enrich.get.geneIDs, and enrich.more
+
+
+enrich.easy <- function(gene_list,gene_group,gene_total,database_dir,group_filter_n = 3,overlap_filter_n = 2){
+                       if(missing(gene_list)){                                stop("provide vector of genes to be enriched")}
+                       if(missing(gene_group)){                               gene_group <- rep("A",times=length(gene_list));
+                                                                              warning("genes assumed to belong to a single group")}
+                       if(missing(gene_total)){                               stop("provide vector of total set of genes (e.g. present on MA)")}
+                           ###Check input
+                           if(length(gene_list) != length(gene_group)){       stop("gene_list and gene_group should match in length")}
+                       ###Database loading
+                       if(missing(database_dir)){                             stop("Give location of enrichment database (or built it using 'built.enrich.DB'")}
+                           load(paste(database_dir,"/obj_Enrich.DB.Rdata",sep=""))
+                           load(paste(database_dir,"/obj_gene.identifiers.Rdata",sep=""))
+
+                       ###Process input
+                           gene_list <- enrich.get.geneIDs(gene_list,gene.identifiers)
+                           gene_total <- enrich.get.geneIDs(unique(gene_total),gene.identifiers)
+
+                           tmp <- tapply(gene_list,gene_group,enrich.more,gene_total=gene_total,Enrich.DB=Enrich.DB)
+                           output <- NULL
+                           for(i in 1:length(tmp)){
+                               output <- rbind(output,tmp[[i]])
+                           }
+                           output <- cbind(Term=rep(names(tmp),lapply(tmp,nrow)),output)
+                               ###Filter on low amounts
+                               output <- output[output[,4]> group_filter_n & output[,6]>overlap_filter_n,]
+                               if(length(output) == 0){                       stop("no significant enrichments")}
+
+                               ###Add bonferroni and category names
+                               output <- cbind(output,Bonferroni=p.adjust(output[,7],method="bonferroni"),Annotation=Enrich.DB[[1]][match(output[,2],Enrich.DB[[1]][,1]),2])
+                               output <- output[,c(1,9,3,4,5,6,7,8)]
+                               colnames(output) <- c("Term","Annotation","Group","Genes_in_group","Overlap_expected","Overlap","Significance","Bonferroni")
+                              
+                               output <- dplyr::mutate(output,Fold_enrichment=log2(Overlap/Overlap_expected)) %>%
+                                         dplyr::select(Term,Annotation,Group,Genes_in_group,Overlap_expected,Overlap,Fold_enrichment,Significance,Bonferroni)
+                       return(output)
+                      }
\ No newline at end of file
diff --git a/Function_scripts/enrich.get.geneIDs.R b/Function_scripts/enrich.get.geneIDs.R
new file mode 100644
index 0000000..6fcd197
--- /dev/null
+++ b/Function_scripts/enrich.get.geneIDs.R
@@ -0,0 +1,32 @@
+###recodes a set of genes into unique IDs
+
+###input
+#gene_list (list of genes (e.g. WBID))
+#gene_identifiers (dataframe with various identifiers per gene)
+
+###output
+#vector with uniform identifiers
+
+###Description
+# Basic enrichment function, normally not used directly.
+
+
+enrich.get.geneIDs <- function(gene_list,gene_identifiers){
+                              if(missing(gene_list)){                         stop("Gene list (e.g. wormbase IDs)")}
+                              if(missing(gene_identifiers)){                  stop("Provide a dataframe with gene identifiers (ID, public name, sequence name, transcript name")}
+
+                              ###Remove empty rows
+                                  gene_list[gene_list == "" | gene_list == "NA" | gene_list == "N/A"] <- NA
+
+                              tmp <- matrix(c(match(gene_list,gene_identifiers[,1]),match(gene_list,gene_identifiers[,2]),match(gene_list,gene_identifiers[,3]),match(gene_list,gene_identifiers[,4])),ncol=4)
+                                  NAs <- which(apply(is.na(tmp),1,sum)==4)
+                                  if(length(NAs) > 0){
+                                      tmp[NAs,] <- 1
+                                  }
+                              output <- gene_identifiers[apply(tmp,1,function(x){unique(x[!is.na(x)])}),1]
+                                  if(length(NAs) > 0){
+                                      output[NAs] <- "none"
+                                      warning(paste("there are",length(NAs),"non-matching identifiers in your query"))
+                                  }
+                              return(output)
+                             }
diff --git a/Function_scripts/enrich.more.R b/Function_scripts/enrich.more.R
new file mode 100644
index 0000000..f671a47
--- /dev/null
+++ b/Function_scripts/enrich.more.R
@@ -0,0 +1,28 @@
+###enrichment for all sets in database
+
+###input
+#gene_list (list of genes in ID format (e.g. WBID))
+#gene_total (all genes detected with platform (e.g. Microarray))
+#anno_sets (annotation sets to be enriched for)
+
+###output
+#dataframe with enrichment,  columns: Anno_type, Group, Genes_in_group, Overlap_expected, Overlap, Significance 
+
+###Description
+# Basic enrichment function, normally not used directly.
+# Calls enrich.one for doing the enrichment per group
+
+enrich.more <- function(gene_list,gene_total,anno_sets,Enrich.DB){
+                       if(missing(gene_list)){                                stop("genes, ID format")}
+                       if(missing(gene_total)){                               stop("total set of genes (e.g. present on MA), ID format")}
+                       if(missing(anno_sets)){                                anno_sets <- Enrich.DB[[1]][-1,1]}
+
+                       output <- NULL
+                       for(i in 1:length(anno_sets)){
+                           anno_group <- Enrich.DB[[which(Enrich.DB[[1]][,1] == anno_sets[i])]]
+                           output <- rbind(output,cbind(anno_sets[i],enrich.one(unique(gene_list),unique(gene_total),anno_group)))
+                       }
+                       colnames(output) <- c("Anno_Type","Group","Genes_in_group","Overlap_expected","Overlap","Significance")
+
+                       return(output)
+                      }
\ No newline at end of file
diff --git a/Function_scripts/enrich.one.R b/Function_scripts/enrich.one.R
new file mode 100644
index 0000000..3cc101a
--- /dev/null
+++ b/Function_scripts/enrich.one.R
@@ -0,0 +1,35 @@
+###enrichment for 1 set
+
+###input
+#gene_list (list of genes in ID format (e.g. WBID))
+#gene_total (all genes detected with platform (e.g. Microarray))
+#anno_group (annotation-group to be enriched for)
+
+###output
+#dataframe with enrichment,  columns: Group, Genes_in_group, Overlap_expected, Overlap, Significance 
+
+###Description
+# Basic enrichment function, normally not used directly.
+
+enrich.one <- function(gene_list,gene_total,anno_group){
+                        if(missing(gene_list)){                                stop("genes, ID format")}
+                        if(missing(gene_total)){                               stop("total set of genes (e.g. present on MA), ID format")}
+                        if(missing(anno_group)){                               stop("Give an annotation-group file")}
+
+                        ###make absolutely sure that we work with unique genes
+                        anno_group[[1]] <- anno_group[[1]][!duplicated(paste(as.character(unlist(anno_group[[1]][,1])),as.character(unlist(anno_group[[1]][,2])))),]
+                        anno_group[[1]][,2] <- as.factor(anno_group[[1]][,2])
+                        gene_list <- unique(gene_list)
+                        gene_total <- unique(gene_total)
+
+                        wb.drawn <- table(anno_group[[1]][anno_group[[1]][,1] %in% gene_list,2])
+                        tot.wb <- table(anno_group[[1]][anno_group[[1]][,1] %in% gene_total,2])
+                        tot.bb <- length(unique(gene_total))- tot.wb
+                        tot.drawn <- length(gene_list)
+                        enr.test <- phyper(wb.drawn,tot.wb,tot.bb,tot.drawn,lower.tail=F)
+
+                        output <- data.frame(rownames(tot.wb),as.numeric(tot.wb),as.numeric(tot.wb)*(sum(gene_list %in% gene_total)/length(gene_total)),as.numeric(wb.drawn),as.numeric(enr.test))
+                        colnames(output) <- c("Group","Genes_in_group","Overlap_expected","Overlap","Significance")
+
+                        return (output)
+}
diff --git a/Function_scripts/fast.lm.1.R b/Function_scripts/fast.lm.1.R
new file mode 100644
index 0000000..df5df3c
--- /dev/null
+++ b/Function_scripts/fast.lm.1.R
@@ -0,0 +1,21 @@
+###fast lm function
+
+###input
+#traits (matrix)
+#variable (vector)
+
+###output
+#matrix with -log10 transformed p-values and effects
+
+###Description
+# Fast linear model, 1 variable
+# Uses fast.summ, can't handle NA's!
+
+    fast.lm.1 <- function(traits,variable){
+                          if(missing(traits)){        stop("specify traits")}
+                          if(missing(variable)){      stop("specify variable")}
+
+                          output <- fast.summ(lm(t(traits)~variable))
+                          return(output)
+                         }
+
diff --git a/Function_scripts/fast.summ.R b/Function_scripts/fast.summ.R
new file mode 100644
index 0000000..3fbf64d
--- /dev/null
+++ b/Function_scripts/fast.summ.R
@@ -0,0 +1,33 @@
+###fast summary function
+
+###input
+#output of lm()
+
+###output
+#matrix with -log10 transformed p-values and effects
+
+###Description
+# Fast summary function
+# This function cannot handle NA's!
+
+
+    fast.summ <- function(input){
+                          p <- input$rank
+                          rdf <- input$df.residual
+                          Qr <- input$qr
+                          n <- nrow(Qr$qr)
+
+                          p1 <- 1L:p
+                          r <- input$residuals
+                          rss <- colSums(r^2)
+
+                          resvar <- rss/rdf
+                          R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
+                          se <- sqrt(rep(diag(R),each=length(resvar)) * rep(resvar,times=length(diag(R))))
+                          est <- input$coefficients[Qr$pivot[p1],]
+                          tval <- t(t(est)/se)
+
+                          pvals <- round(-log10(2 * pt(abs(tval),rdf, lower.tail = FALSE)),digits=2)
+                          output <- cbind(t(pvals)[,-1],t(round(est,digits=5))[,-1])
+                          return(output)
+                         }
\ No newline at end of file
diff --git a/Function_scripts/fast.summ.transcriptomics.R b/Function_scripts/fast.summ.transcriptomics.R
new file mode 100644
index 0000000..5c81e07
--- /dev/null
+++ b/Function_scripts/fast.summ.transcriptomics.R
@@ -0,0 +1,33 @@
+###fast summary function for transcriptomics
+
+###input
+#output of lm()
+
+###output
+#matrix with -log10 transformed p-values and effects
+
+###Description
+# Fast summary function
+# This function cannot handle NA's!
+
+
+fast.summ.transcriptomics <- function(input){
+                                      p <- input$rank
+                                      rdf <- input$df.residual
+                                      Qr <- input$qr
+                                      n <- nrow(Qr$qr)
+
+                                      p1 <- 1L:p
+                                      r <- input$residuals
+                                      rss <- colSums(r^2)
+
+                                      resvar <- rss/rdf
+                                      R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
+                                      se <- sqrt(rep(diag(R),each=length(resvar)) * rep(resvar,times=length(diag(R))))
+                                      est <- input$coefficients[Qr$pivot[p1],]
+                                      tval <- t(t(est)/se)
+
+                                      pvals <- 2 * pt(abs(tval),rdf, lower.tail = FALSE)
+                                      output <- cbind(t(pvals)[,-1],t(round(est,digits=5))[,-1])
+                                      return(output)
+                                     }
\ No newline at end of file
diff --git a/Function_scripts/genetic.df.to.mat.R b/Function_scripts/genetic.df.to.mat.R
new file mode 100644
index 0000000..46fd1ee
--- /dev/null
+++ b/Function_scripts/genetic.df.to.mat.R
@@ -0,0 +1,67 @@
+###Transform a dataframe genetic map format to a matrix
+#Mark Sterken, 26/7/2020
+
+###input
+#genotype.dataframe (dataframe; organized by strain, chromosome, position_start, position_end, genotype)
+#chromosomes (chromosome names of organism; standard is C. elegans)
+#chromosome_size (sizes of chromoosmes; standard is C. elegans)
+#marker_spacing (number of markers to generate; standars is one marker per 10kb)
+
+###output
+#a list with [[1]] a matrix with the genetic map and [[2]] a dataframe with the information on the markers
+
+###Description
+#translates a long-format genetic map to a wide-format genetic map useful for further analyses.
+
+###See also
+
+genetic.df.to.mat <- function(genotype.dataframe,chromosomes,chromosome_size,marker_spacing){
+                             if(missing(genotype.dataframe)){                     stop("requires a dataframe of crossover coordinates: strain, chromosome, position_start, position_end, genotype")}
+                             if(missing(chromosomes)){                            chromosomes <- c("I","II","III","IV","V","X")}
+                             if(missing(chromosome_size)){                        chromosome_size <- c(15072434,15279421,13783801,17493829,20924180,17718942)}
+                             if(length(chromosomes) != length(chromosome_size)){  stop("chromosome names and sizes should have the same length")}
+                             if(missing(marker_spacing)){                         marker_spacing <- 10000}
+
+
+                             ###reformat colnames to lowercase
+                             colnames(genotype.dataframe) <- tolower(colnames(genotype.dataframe))
+
+                             ###Make empty coordinate system based on input parameters
+                             ###Always include base 1 and last base per chromosome
+                             tmp <- tapply(chromosome_size,chromosomes,function(x,marker_spacing){c(1,seq(marker_spacing,x,by=marker_spacing),x)},marker_spacing=marker_spacing)
+                             
+                             ###go through markers, strain by strain
+                             genotype.matrix <- NULL
+                             for(i in 1:length(unique(genotype.dataframe$strain))){
+                                 tmp.nu <- lapply(tmp,function(x){NA[!is.na(x)]})
+                                 strain.nu <- genotype.dataframe[genotype.dataframe$strain == unique(genotype.dataframe$strain)[i],]
+                                 strain.nu <- strain.nu[strain.nu$chromosome %in% chromosomes,]
+                                 for(j in 1:nrow(strain.nu)){
+                                     chr.nu <- which(names(tmp) == strain.nu$chromosome[j])
+                                     tmp.nu[[chr.nu]][tmp[[chr.nu]] %in% strain.nu$position_start[j]:strain.nu$position_end[j]] <- strain.nu$genotype[j]
+                                 }
+                                 
+                                 genotype.matrix <- cbind(genotype.matrix,unlist(tmp.nu))
+                                     colnames(genotype.matrix)[i] <- unique(genotype.dataframe$strain)[i]
+                             }
+                             rownames(genotype.matrix) <- paste(rep(names(tmp),times=unlist(lapply(tmp,length))),as.character(unlist(tmp)),sep="")
+                             
+                             informative <- genetic.informative.markers(genotype.matrix)
+                             
+                             ###Built marker file
+                             strain.marker <- data.frame(cbind(name=paste(rep(names(tmp),times=unlist(lapply(tmp,length))),as.character(unlist(tmp)),sep=""), 
+                                                               chromosome=rep(names(tmp),times=unlist(lapply(tmp,length))),
+                                                               position=unlist(tmp),
+                                                               informative_marker=as.numeric(informative)))
+                             strain.marker[,1] <- as.character(unlist(strain.marker[,1]))
+                             strain.marker[,2] <- as.character(unlist(strain.marker[,2]))
+                             strain.marker[,3] <- as.numeric(as.character(unlist(strain.marker[,3])))
+                             strain.marker[,4] <- as.numeric(as.character(unlist(strain.marker[,4])))
+                             
+                             output <- list(NULL)
+                                output[[1]] <- genotype.matrix
+                                output[[2]] <- strain.marker
+                                names(output) <- c("strain.map","strain.marker")
+                            
+                             return(output)
+                            }
\ No newline at end of file
diff --git a/Function_scripts/genetic.distance.R b/Function_scripts/genetic.distance.R
new file mode 100644
index 0000000..a0e8c18
--- /dev/null
+++ b/Function_scripts/genetic.distance.R
@@ -0,0 +1,81 @@
+###calculate genetic genetic distance between markers
+
+
+###input
+#strain.map (of the investigated strains: markers in rows, genotypes in columns)
+#strain.marker (of the invesigated strains: markers in rows, with columns name, chromosome and position)
+#chromosomes (character vector with chromosome names; standard is C. elegans)
+#chromosome_size (numeric vector with chromosome sizes; standard is C. elegans)
+#ignore.na (logic, tells if NAs should be incorporated in the analysis)
+
+
+###output
+#a dataframe with chromosome, physical position, and genetic position (cM)
+
+###Description
+#ignore.na makes the algorithm find te nearest non-NA marker
+
+
+
+
+genetic.distance <- function(strain.map,strain.marker,chromosomes,chromosome_size,ignore.na){
+                            if(missing(strain.map)){                             stop("Requires genetic map with markers per row and strains per column")}
+                            if(missing(strain.marker)){                          stop("Requires marker file with 3 columns: name, chromosome, position")}
+                            if(missing(chromosomes)){                            chromosomes <- c("I","II","III","IV","V","X")}
+                            if(missing(chromosome_size)){                        chromosome_size <- c(15072434,15279421,13783801,17493829,20924180,17718942)}
+                            if(length(chromosomes) != length(chromosome_size)){  stop("chromosome names and sizes should have the same length")}
+                            if(missing(ignore.na)){                              ignore.na <- FALSE}
+
+                            ###rolling mean function
+                            rollingmean <- function(x){out <- NULL; for(i in 2:length(x)){out <- c(out,mean(x[(i-1):i]))};return(out)}
+
+                            ###make colnames comply to format
+                            colnames(strain.marker) <- c("name", "chromosome", "position")
+
+                            ###output
+                            output <- NULL
+
+                            ###per chromosome
+                            for(i in 1:length(chromosomes)){
+                                currchr <- strain.map[strain.marker[,2]==chromosomes[i],]
+                                currmrk <- strain.marker[strain.marker[,2]==chromosomes[i],]
+                                ###fills in gaps based on the genotype of the closest non-NA marker
+                                if(!ignore.na){
+                                    for(j in 1:ncol(currchr)){
+                                        for(k in which(is.na(currchr[,j]))){
+                                            currori <- currchr[,j]
+                                            closest <- currmrk[,3]-currmrk[k,3]
+                                            closest[k] <- NA
+                                            closest <- order(abs(closest))
+                                            ii <- 1
+                                            while(is.na(currori[closest[ii]])){
+                                                ii <- ii + 1
+                                            }
+                                            currchr[k,j] <- currori[closest[ii]]
+                                        }
+                                    }
+                                }
+
+                                ###count recombinations
+                                diffmap <- apply(currchr,2,diff) != 0
+
+                                notrecomb <- apply(!diffmap & !is.na(diffmap),1,sum)
+                                yesrecomb <- apply(diffmap & !is.na(diffmap),1,sum)
+
+                                fractions <- cumsum(yesrecomb/(notrecomb+yesrecomb))
+                                fractions <- c(fractions[1],fractions,rev(fractions)[1])
+
+                                locations <- rollingmean(currmrk[,3])
+                                locations <- c(1,locations,chromosome_size[i])
+
+                                ###into output
+                                output <- rbind(output,cbind(chromosome=chromosomes[i],position=locations,cM=fractions))
+                            }
+                            output <- data.frame(output)
+                                output[,1] <- as.character(unlist(output[,1]))
+                                output[,2] <- as.numeric(as.character(unlist(output[,2])))
+                                output[,3] <- as.numeric(as.character(unlist(output[,3])))*100
+
+                            return(output)
+                           }
+
diff --git a/Function_scripts/genetic.informative.markers.R b/Function_scripts/genetic.informative.markers.R
new file mode 100644
index 0000000..5f646ef
--- /dev/null
+++ b/Function_scripts/genetic.informative.markers.R
@@ -0,0 +1,27 @@
+###Determine whether adjecent markers are informative
+#Mark Sterken, 26/7/2020
+
+###input
+#genotype.matrix (preferrably numeric matrix with genotypes; if not it will force it to be numberic)
+
+###output
+#a vector with whether a marker is informative (1) or not (0)
+
+###Description
+#performes side-by-side correlation analysis to determine if crossovers occured between markers. If so it will deem the markers on either side useful.
+
+###See also                                         
+
+genetic.informative.markers <- function(genotype.matrix){
+                                         ###force into numeric matrix for correlation analysis
+                                         genotype.matrix.numeric <- matrix(as.numeric(as.factor(unlist(genotype.matrix))),ncol=ncol(genotype.matrix))
+                                             genotype.matrix.numeric[is.na(genotype.matrix.numeric)] <- 0
+                                        
+                                         tmp <- round(as.numeric(diag(cor(t(genotype.matrix.numeric))[-1,])),digits=5)
+                                         ###each different marker is informative
+                                         tmp1 <- c(0,tmp)
+                                         tmp2 <- c(tmp,0)
+                                         
+                                         output <- tmp1 != 1 | tmp2 != 1
+                                         return(output)
+                                        }
diff --git a/Function_scripts/lm.1.R b/Function_scripts/lm.1.R
new file mode 100644
index 0000000..fc95b44
--- /dev/null
+++ b/Function_scripts/lm.1.R
@@ -0,0 +1,29 @@
+###lm function for NA handling
+
+###input
+#data.input (matrix)
+#variable (vector)
+
+###output
+#matrix with -log10 transformed p-values and effects
+
+###Description
+# Slow linear model for NA handling
+# ignores heterozygous strains in effect determination
+
+
+
+    lm.1 <- function(data.input,variable){
+                     retext <- function(x){return(c(first(x),last(x)))}
+                     
+                     out <- NULL
+                     for(i in 1:nrow(data.input)){
+                         out <- rbind(out,
+                                      cbind(LOD=-log10(cor.test(data.input[i,!is.na(variable)],variable[!is.na(variable)],na.action=na.exclude())$p.value),
+                                            effect=diff(retext(tapply(data.input[i,!is.na(variable)],variable[!is.na(variable)],mean,na.rm=T))))
+                                     )
+                     }
+
+                     return(out)
+                    }
+
diff --git a/Function_scripts/permutate.traits.R b/Function_scripts/permutate.traits.R
new file mode 100644
index 0000000..507b541
--- /dev/null
+++ b/Function_scripts/permutate.traits.R
@@ -0,0 +1,20 @@
+###permutates a trait matrix
+
+###input
+#trait.matrix (matrix strains in columsn, traits in rows)
+
+
+###output
+#by-row permutated matrix
+
+###Description
+
+
+    permutate.traits <- function(trait.matrix){
+                                 perm.list <- rep(c(1:nrow(trait.matrix)),times=ncol(trait.matrix)) + runif(length(trait.matrix),0.25,0.75)
+                                 perm.list <- order(perm.list)
+                                 perm.data <- matrix(as.numeric(trait.matrix)[perm.list],nrow=nrow(trait.matrix),ncol=ncol(trait.matrix),byrow=TRUE)
+                                 return(perm.data)
+    }
+
+    
\ No newline at end of file
diff --git a/Function_scripts/plot.genotype.strain.R b/Function_scripts/plot.genotype.strain.R
new file mode 100644
index 0000000..48f0547
--- /dev/null
+++ b/Function_scripts/plot.genotype.strain.R
@@ -0,0 +1,108 @@
+################################################################################
+###plot.genotype.strain(genotype.marker,genotype.strain1,genotype.strain2,chr.info,col.lines,both.chr)
+### visualizes the genotype of a single strain, can also visualize two strains side by side
+###
+###v1.1 fixed a bug where the area between the second last and last marker of the chromosome was not plotted
+###
+###   obligatory input
+###       strain.marker; marker names, and their corresponding chromosome numbers and basepairs
+###       strain.map1; genotype of the strain of interest, indicated using 1 (N2) and -1 (CB4856) values
+###   optional input
+###       chromosomes (character vector with chromosome names; standard is C. elegans)
+###       chromosome_size (numeric vector with chromosome sizes; standard is C. elegans)
+###       strain.map2; genotype of the second strain of interest, indicated using 1 (N2) and -1 (CB4856) values
+###       both.chr; set to TRUE if you input strain.map2; default is FALSE
+###       col.lines; graphic colours; default is c("blue", "grey", "orange")
+
+plot.genotype.strain <- function(strain.marker,strain.map1,strain.map2,chromosomes,chromosome_size,col.lines,both.chr){
+                                if(missing(strain.marker)){                         stop("insert marker info, name, chromosome, pos")}
+                                if(missing(strain.map1)){                           strain.map1 <- rep(0,nrow(strain.marker))}
+                                if(missing(strain.map2)){                           strain.map2 <- strain.map1}
+                                if(missing(chromosomes)){                           chromosomes <- c("I","II","III","IV","V","X")}
+                                if(missing(chromosome_size)){                       chromosome_size <- c(15072434,15279421,13783801,17493829,20924180,17718942)}
+                                if(length(chromosomes) != length(chromosome_size)){ stop("chromosome names and sizes should have the same length")}
+                                if(missing(col.lines)){                             col.lines <- c("#1F78B4","grey","#FF7F00")} ##default is -1 CB: blue, 1 N2: orange
+                                if(missing(both.chr)){                              both.chr <- FALSE}
+
+                                ###input chromosomes  
+                                chr.info <- data.frame(cbind(chromosome=chromosomes,
+                                                             size=chromosome_size,
+                                                             cumulative=c(0,cumsum(chromosome_size[-length(chromosome_size)]))),
+                                                       stringsAsFactors=FALSE)
+                                chr.info[,2] <- as.numeric(as.character(unlist(chr.info[,2]))) 
+                                chr.info[,3] <- as.numeric(as.character(unlist(chr.info[,3])))
+  
+                                strain.map1[is.na(strain.map1)] <- 0
+                                    strain.map1 <- col.lines[strain.map1+2]
+
+                                if(both.chr == FALSE){
+                                    plot(0,0,type="n",ylim=c(0,24),xlim=c(0.5,6.5),cex=2,axes=F,ylab="",xlab="",cex.lab=1.5)
+
+                                    ###loop over markers
+                                    for(j in 1:nrow(strain.marker)){
+                                        xpos <- which(chr.info[,1] == strain.marker[j,2])
+
+                                        tmp <- strain.marker[strain.marker[,2] == chr.info[xpos,1],]
+                                        m.on.c <- which(tmp[,3] == strain.marker[j,3])
+                                        if(m.on.c == 1){
+                                            lines(c(xpos,xpos),c(0,strain.marker[j,3]/1e6),col=strain.map1[j],lwd=21,lend=1)
+                                        }
+                                        if(m.on.c > 1 & m.on.c < nrow(tmp)){
+                                            lines(c(xpos,xpos),c(strain.marker[j-1,3]/1e6,strain.marker[j,3]/1e6),col=strain.map1[j],lwd=21,lend=1)
+                                        }
+                                        if(m.on.c == nrow(tmp)){
+                                            lines(c(xpos,xpos),c(strain.marker[j-1,3]/1e6,strain.marker[j,3]/1e6),col=strain.map1[j],lwd=21,lend=1)
+                                            lines(c(xpos,xpos),c(strain.marker[j,3]/1e6,chr.info[xpos,2]/1e6),col=strain.map1[j],lwd=21,lend=1)
+                                        }
+                                    }
+                                    text(1:6,23,chr.info[,1],font=2,cex=1.5)
+                                }
+
+                                if(both.chr == TRUE){
+                                strain.map2[is.na(strain.map2)] <- 0
+                                    strain.map2 <- col.lines[strain.map2+2]
+
+                                    plot(0,0,type="n",ylim=c(0,23),xlim=c(0.5,6.5),cex=2,axes=F,ylab="",xlab="",cex.lab=1.5)
+
+                                    ###loop over markers
+                                    for(j in 1:nrow(strain.marker)){
+                                        xpos <- which(chr.info[,1] == strain.marker[j,2])
+
+                                        tmp <- strain.marker[strain.marker[,2] == chr.info[xpos,1],]
+                                        m.on.c <- which(tmp[,3] == strain.marker[j,3])
+                                        if(m.on.c == 1){
+                                            lines(c(xpos-0.2,xpos-0.2),c(0,strain.marker[j,3]/1e6),col=strain.map1[j],lwd=9,lend=1)
+                                            lines(c(xpos+0.2,xpos+0.2),c(0,strain.marker[j,3]/1e6),col=strain.map2[j],lwd=9,lend=1)
+                                        }
+                                        if(m.on.c > 1 & m.on.c < nrow(tmp)){
+                                            lines(c(xpos-0.2,xpos-0.2),c(strain.marker[j-1,3]/1e6,strain.marker[j,3]/1e6),col=strain.map1[j],lwd=9,lend=1)
+                                            lines(c(xpos+0.2,xpos+0.2),c(strain.marker[j-1,3]/1e6,strain.marker[j,3]/1e6),col=strain.map2[j],lwd=9,lend=1)
+                                        }
+                                        if(m.on.c == nrow(tmp)){
+                                            lines(c(xpos-0.2,xpos-0.2),c(strain.marker[j-1,3]/1e6,strain.marker[j,3]/1e6),col=strain.map1[j],lwd=9,lend=1)
+                                            lines(c(xpos+0.2,xpos+0.2),c(strain.marker[j-1,3]/1e6,strain.marker[j,3]/1e6),col=strain.map2[j],lwd=9,lend=1)
+                                            lines(c(xpos-0.2,xpos-0.2),c(strain.marker[j,3]/1e6,chr.info[xpos,2]/1e6),col=strain.map1[j],lwd=9,lend=1)
+                                            lines(c(xpos+0.2,xpos+0.2),c(strain.marker[j,3]/1e6,chr.info[xpos,2]/1e6),col=strain.map2[j],lwd=9,lend=1)
+                                        }
+                                    }
+                                    text(1:6,23,chr.info[,1],font=2,cex=1.5)
+                                }
+                               }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Function_scripts/prep.ggplot.QTL.profile.R b/Function_scripts/prep.ggplot.QTL.profile.R
new file mode 100644
index 0000000..4fb2b79
--- /dev/null
+++ b/Function_scripts/prep.ggplot.QTL.profile.R
@@ -0,0 +1,20 @@
+    prep.ggplot.QTL.profile <- function(QTL.peak.output,map1.output,trait){
+                                        output <- list()
+
+                                        output[[1]] <- QTL.peak.output[as.character(unlist(QTL.peak.output$trait))==trait,]
+                                            names(output)[1] <- "QTL_profile"
+                                        tmp <- output[[1]]$qtl_peak[!is.na(output[[1]]$qtl_peak)]
+
+                                        if(length(tmp)>0){
+                                            for(i in 1:length(tmp)){
+                                                output[[i+1]] <- cbind(trait=trait,
+                                                                       map1.output$Marker[tmp[i],],
+                                                                       strain=colnames(map1.output$Trait),
+                                                                       genotype=map1.output$Map[tmp[i],],
+                                                                       trait_value=map1.output$Trait[rownames(map1.output$Trait)==trait,],
+                                                                       R_squared=R.sq(x=map1.output$Map[tmp[i],],y=map1.output$Trait[rownames(map1.output$Trait)==trait,]))
+                                                names(output)[i+1] <- paste("QTLsplit_",as.character(unlist(map1.output$Marker[tmp[i],1])),sep="_")
+                                            }
+                                        }
+                                        return(output)
+                                       }
diff --git a/Function_scripts/prep.ggplot.eQTL.table.R b/Function_scripts/prep.ggplot.eQTL.table.R
new file mode 100644
index 0000000..e6833b9
--- /dev/null
+++ b/Function_scripts/prep.ggplot.eQTL.table.R
@@ -0,0 +1,36 @@
+    ###Makes a table with eQTL peaks for plotting
+    ###this function adds points to faithfully indicate the chromosome boundaries (standard is C. elegans))
+    prep.ggplot.eQTL.table <- function(eQTL.table.file,condition,chromosomes,chromosome_size){
+                                       if(missing(eQTL.table.file)){                        stop("need a eQTL.table.file")}
+                                       if(missing(condition)){                              condition <- "ct"}
+                                       if(missing(chromosomes)){                            chromosomes <- c("I","II","III","IV","V","X")}
+                                       if(missing(chromosome_size)){                        chromosome_size <- c(15072434,15279421,13783801,17493829,20924180,17718942)}
+                                       if(length(chromosomes) != length(chromosome_size)){  stop("chromosome names and sizes should have the same length")}
+
+                                       output <- dplyr::filter(eQTL.table.file, gene_chromosome %in% chromosomes) %>%
+                                                 dplyr::mutate(gene_chromosome = factor(gene_chromosome,levels=rev(chromosomes))) %>%
+                                                 dplyr::mutate(qtl_chromosome = factor(qtl_chromosome,levels=chromosomes),condition=condition) %>%
+                                                 dplyr::mutate(qtl_type = factor(qtl_type,levels=rev(sort(unique(qtl_type)))))
+
+                                       ###Chromosome sizes cheat
+                                       cheatpoints <- output[rep(1,times=length(chromosomes)*2),]
+                                       for(i in which(unlist(lapply(output,class)) == "factor")){
+                                           cheatpoints[,i] <- as.character(unlist(cheatpoints[,i]))
+                                       }
+                                       cheatpoints[1:nrow(cheatpoints),unlist(lapply(output,class)) != "character"] <- 0
+                                       cheatpoints[1:nrow(cheatpoints),unlist(lapply(output,class)) != "numeric"] <- ""
+                                       cheatpoints$qtl_chromosome <- as.character(rep(chromosomes,times=2))
+                                       cheatpoints$gene_chromosome <- as.character(rep(chromosomes,times=2))
+                                       cheatpoints$qtl_bp <- as.numeric(as.character(c(chromosome_size,rep(1,times=length(chromosomes)))))
+                                       cheatpoints$gene_bp <- as.numeric(as.character(c(chromosome_size,rep(1,times=length(chromosomes)))))
+                                       cheatpoints$trait <- paste(as.character(rep(chromosomes,times=2)),rep(c(1,2),each=length(chromosomes)))
+
+
+                                       ###combine
+                                       output <- rbind(output,cheatpoints) %>%
+                                                 dplyr::mutate(qtl_type = factor(qtl_type,levels = c("cis","trans","")))
+                                       
+                                       ###return
+                                       return(output)
+                                      }
+
diff --git a/Function_scripts/prep.ggplot.genetic.map.R b/Function_scripts/prep.ggplot.genetic.map.R
new file mode 100644
index 0000000..2b96c30
--- /dev/null
+++ b/Function_scripts/prep.ggplot.genetic.map.R
@@ -0,0 +1,52 @@
+###rewrites the genetic map to a dataframe plottable in ggplot
+
+
+###input
+#strain.map (of the investigated strains: markers in rows, genotypes in columns)
+#strain.marker (of the invesigated strains: markers in rows, with columns name, chromosome and position)
+
+###output
+#dataframe with columns: strain, chromosome, position_start, position_end, and genotype
+
+###Description
+
+prep.ggplot.genetic.map <- function(strain.map,strain.marker){
+                                    if(missing(strain.map)){                        stop("Requires genetic map with markers per row and strains per column")}
+                                    if(missing(strain.marker)){                     stop("Requires marker file with 3 columns: name, chromosome, position")}
+                                    if(nrow(strain.map) != nrow(strain.marker)){    stop("map does not match markers")}
+                                    if(ncol(strain.marker) != 3){                   stop("Requires marker file with 3 columns: name, chromosome, position")}
+
+                                    diffmap <- apply(strain.map,2,diff) != 0
+                                    chrmap <- diff(as.numeric(as.factor(strain.marker[,2]))) != 0
+
+                                    tmp <- as.data.frame(cbind(name=as.character(unlist(strain.marker[,1])),
+                                                           chromosome=as.character(unlist(strain.marker[,2])),
+                                                           position=as.numeric(as.character(unlist(strain.marker[,3]))),
+                                                           strain=rep(colnames(strain.map),each=nrow(strain.map)),
+                                                           genotype=as.numeric(strain.map),
+                                                           diff_left=as.numeric(rbind(diffmap,FALSE)),
+                                                           diff_right=as.numeric(rbind(FALSE,diffmap)),
+                                                           chr_left=as.numeric(c(chrmap,TRUE)),
+                                                           chr_right=as.numeric(c(TRUE,chrmap))))
+                                    for(i in c(3,5:9)){
+                                        tmp[,i] <- as.numeric(as.character(unlist(tmp[,i])))
+                                    }
+
+                                    tmp <- cbind(tmp[,1:5],diffs=apply(tmp[,6:9],1,sum))
+                                    tmp <- tmp[tmp[,6] != 0,]
+
+                                    tmp <- cbind(tmp[-nrow(tmp),-6],tmp[-1,-6])
+                                    tmp <- tmp[tmp[,2] == tmp[,7] & tmp[,4] == tmp[,9] & tmp[,5] == tmp[,10],]
+
+                                    output <- as.data.frame(cbind(strain=as.character(unlist(tmp[,4])),
+                                                                  chromosome=as.character(unlist(tmp[,2])),
+                                                                  position_start=as.numeric(as.character(unlist(tmp[,3]))),
+                                                                  position_end=as.numeric(as.character(unlist(tmp[,8]))),
+                                                                  genotype=as.numeric(as.character(unlist(tmp[,5])))))
+                                        output[,1] <- as.character(unlist(output[,1]))
+                                        output[,2] <- as.character(unlist(output[,2]))
+                                        output[,3] <- as.numeric(as.character(unlist(output[,3])))
+                                        output[,4] <- as.numeric(as.character(unlist(output[,4])))
+
+                                    return(output)
+                                   }
\ No newline at end of file
diff --git a/Function_scripts/transcriptomics.age.predictor.R b/Function_scripts/transcriptomics.age.predictor.R
new file mode 100644
index 0000000..fe9f03b
--- /dev/null
+++ b/Function_scripts/transcriptomics.age.predictor.R
@@ -0,0 +1,49 @@
+###predicts relative age of C. elegans transcriptomics sample
+# Snoek, L. Basten, et al. "A rapid and massive gene expression shift marking adolescent transition in C. elegans." Scientific reports 4 (2014): 3912.
+
+###input
+#df_input (dataframe with columns: SampleID,gene_WBID,log2_intensities)
+#predict_lm_fit (a fitted linear model object)
+
+###output
+#dataframe with: SampleID, hours_median,hours_sd,age_relative,age_relative_sd
+
+###Description
+# Please note that only the relative age is reliable for the age-range of 44-68 hours. If your samples are outside of that range, the prediction is
+# probably not accurate. The predicated age can only be interpreted relative; only if you have samples for calibration the relative age can be converted
+
+transcriptomics.age.predictor <- function(df_input,predict_lm_fit=age_predict_lm_fit){
+                                          if(missing(df_input)){                stop("give input dataframe, columns: SampleID,gene_WBID,log2_intensities")}
+                                          if(missing(predict_lm_fit)){          stop("requires fitted model timepoint~expression:gene")}
+                                            
+                
+                                          genes <- unlist(strsplit(names(predict_lm_fit$coefficients),split=":"))
+                                              genes <- genes[grepl("WBID",genes)]
+                                              genes <- gsub("gene_WBID","",genes)
+                                          
+                                          out <- NULL
+                                          for(i in unique(df_input$SampleID)){
+                                              sample.nu <- dplyr::filter(df_input,SampleID==i) %>%
+                                                           dplyr::filter(gene_WBID %in% genes) %>%
+                                                           dplyr::group_by(gene_WBID) %>%
+                                                           dplyr::summarise(log2_intensities=median(log2_intensities)) %>%
+                                                           dplyr::mutate(expression=log2(log2_intensities/mean(log2_intensities))) %>%
+                                                           dplyr::select(gene_WBID,expression) %>%
+                                                           dplyr::mutate(gene_WBID=as.character(unlist(gene_WBID)))
+                                            
+                                              out <- rbind(out,
+                                                           cbind(SampleID=i,hours_median=median(predict(predict_lm_fit,newdata=sample.nu)),hours_sd=sd(predict(predict_lm_fit,newdata=sample.nu)))
+                                                     )
+                                          }    
+                                          
+                                          out <- data.frame(out)
+                                              out[,1] <- as.character(unlist(out[,1]))
+                                              out[,2] <- as.numeric(as.character(unlist(out[,2])))          
+                                              out[,3] <- as.numeric(as.character(unlist(out[,3]))) 
+                                          
+                                          out <- mutate(out,age_relative=(hours_median-mean(hours_median))/sd(hours_median),age_relative_sd=hours_sd/sd(hours_median))
+                                          
+                                          warning("prediction is accurate for RELATIVE age between 44 - 68 hours of age") 
+                                          return(out)
+                                         }
+                
diff --git a/Function_scripts/transcriptomics.df.to.list.R b/Function_scripts/transcriptomics.df.to.list.R
new file mode 100644
index 0000000..ab1fdf9
--- /dev/null
+++ b/Function_scripts/transcriptomics.df.to.list.R
@@ -0,0 +1,48 @@
+################################################################################################################################################################
+### dataframe.to.list(dataframe,variable.test,transformation.test) v1.3
+###   returns a list with a data matrix of the selected data column for use in the lm scripts and a numeric vector of the variables
+###   obligatory input
+###     dataframe, output from list.to.dataframe
+###     variable.test, the names of the columns for which the variables should be generated, simply give the names in a character vector
+###     transformation.test, the name of the column that should be turned into a data matrix
+### v1.2 Modified the function so multiple variables can be extracted at once
+### v1.3 Fixed naming the output
+
+transcriptomics.df.to.list <- function(dataframe,variable.test,transformation.test){
+                                       if(missing(dataframe)){                            stop("require dataframe")}
+                                       if(missing(variable.test)){                        stop("require column names of the variables that should be tested")}
+                                       if(missing(transformation.test)){                  stop("require column name of the data that should be tested")}
+          
+                                       ###Generate output file
+                                       output <- list()
+          
+                                       ###Generate the data file
+                                       col.dat <- colnames(dataframe) == transformation.test
+          
+          
+                                       output[[1]] <- matrix(dataframe[,col.dat],nrow=length(unique(dataframe$SpotID)))
+                                                      colnames(output[[1]]) <- rep("",times=ncol(output[[1]]))
+                                                      rownames(output[[1]]) <- unique(dataframe$SpotID)
+          
+                                       ###Generate the variable vectors for lm
+                                       col.var <- colnames(dataframe) %in% variable.test
+          
+                                       n <- 2
+                                       for(i in which(col.var)){
+                                           variables <- as.numeric(as.factor(dataframe[seq(1,nrow(dataframe),by=length(unique(dataframe$SpotID))),i]))
+                                               variables[variables ==1] <- -1
+                                               variables[variables > 1] <- variables[variables > 1] -1
+                                               names(variables) <- dataframe[seq(1,nrow(dataframe),by=length(unique(dataframe$SpotID))),i]
+          
+                                           output[[n]] <- variables
+                                           if(n==2){colnames(output[[1]]) <- names(variables)}
+                                           if(n> 2){colnames(output[[1]]) <- paste(colnames(output[[1]]),names(variables),sep=":")}
+                                           n <- n+1
+                                       }
+          
+                                       ###name the output
+                                       names(output) <- c(transformation.test,colnames(dataframe)[col.var])
+          
+                                       ###return
+                                       return(output)
+                                      }
diff --git a/Function_scripts/transcriptomics.lm.R b/Function_scripts/transcriptomics.lm.R
new file mode 100644
index 0000000..db46286
--- /dev/null
+++ b/Function_scripts/transcriptomics.lm.R
@@ -0,0 +1,88 @@
+###linear model for transcriptomics
+
+###input
+#lm_data (matrix with transcriptome data)
+#lm_factors (dataframe with factors over which data should be explained)
+#lm_model (either addtive, interaction, or custom)
+#lm_model_custom (in case custom is selected as model, provide the model)
+
+###output
+#list with input data, factors, and outcome of the model
+
+###Description
+# custom models can be written as lm_factors[,1] + lm_factors[,2] + lm_factors[,1] : lm_factors[,3]
+
+
+transcriptomics.lm <- function(lm_data,lm_factors,lm_model,lm_model_custom){
+                                if(missing(lm_data)){                                       stop("provide data for the linear model")}
+                                if(missing(lm_factors)){                                    stop("provide dataframe with the explanatory factors")}
+                                if(missing(lm_model)){                                      lm_model <- "additive"}
+                                if(!lm_model %in% c("additive","interaction","custom")){    stop("the supported model types are: additive, interaction, and custom")}
+                                if(lm_model == "custom"){
+                                    if(missing(lm_model_custom)){                           stop("provide a model in the form of: lm_factors[,1] + lm_factors[,2] + lm_factors[,1] : lm_factors[,3]")}
+                                }
+
+                                if(length(dim(lm_factors)) == 0){lm_factors <- matrix(lm_factors,ncol=1); colnames(lm_factors) <- "x"}
+
+                                ###constructs the names of the interaction model
+                                interaction_names <- function(output){
+                                                              name_matrix <- matrix(colnames(output[[2]]),ncol=ncol(output[[2]]),nrow=ncol(output[[2]]))
+                                                              tmp <- list()
+                                                              lm_names <- NULL
+
+                                                              for(j in 1:ncol(output[[2]])){
+                                                                  if(j == 1){
+                                                                      tmp[[j]] <- name_matrix
+                                                                  } else {
+                                                                      tmp[[j]] <- name_matrix[-c(1:(j-1)),-c(1:(j-1))]
+                                                                  }
+                                                                  if(j < ncol(output[[2]])){
+                                                                      for(i in 2:ncol(tmp[[j]])){
+                                                                          tmp[[j]][,i] <- paste(paste(tmp[[j]][1:(i-1),i],collapse=":"),tmp[[j]][,i],sep=":")
+                                                                      }
+                                                                  }
+                                                                  tmp[[j]][upper.tri(tmp[[j]])] <- NA
+                                                              }
+                                                              for(j in 1:ncol(output[[2]])){
+                                                                  if(j == 1){
+                                                                      lm_names <- c(lm_names,tmp[[j]][,j])
+                                                                  } else {
+                                                                      for(i in 1:ncol(output[[2]])){
+                                                                          if(i <= (ncol(output[[2]])+1-j)){
+                                                                              lm_names <- c(lm_names,tmp[[i]][,j])
+                                                                          }
+                                                                      }
+                                                                  }
+                                                              }
+                                                              lm_names <- lm_names[!is.na(lm_names)]
+                                                              return(lm_names)
+                                                             }
+
+                                output <- list()
+                                    output[[1]] <- lm_data; if(length(rownames(output[[1]])) == 0){rownames(output[[1]]) <- 1:nrow(output[[1]])}
+                                    output[[2]] <- lm_factors; if(length(colnames(output[[2]])) == 0){colnames(output[[2]]) <- 1:ncol(output[[2]])}
+
+
+                                if(lm_model == "additive"){
+                                    lm_model_custom <- paste("lm(t(output[[1]]) ~ ",paste(paste("output[[2]][,",1:ncol(output[[2]]),"]",sep=""),collapse=" + "),")",sep="")
+                                    tmp <- fast.summ.transcriptomics(eval(parse(text=lm_model_custom)))
+                                        colnames(tmp) <- c(paste("significance_",colnames(output[[2]]),sep=""),paste("effect_",colnames(output[[2]]),sep=""))
+                                }
+                                if(lm_model == "interaction"){
+                                    lm_model_custom <- paste("lm(t(output[[1]]) ~ ",paste(paste("output[[2]][,",1:ncol(output[[2]]),"]",sep=""),collapse=" * "),")",sep="")
+                                    tmp <- fast.summ.transcriptomics(eval(parse(text=lm_model_custom)))
+                                        colnames(tmp) <- c(paste("significance_",interaction_names(output),sep=""),paste("effect_",interaction_names(output),sep=""))
+                                }
+                                if(lm_model == "custom"){
+                                    lm_model_custom <- paste("lm(t(output[[1]]) ~ ",lm_model_custom,")",sep="")
+                                    tmp <- fast.summ.transcriptomics(eval(parse(text=lm_model_custom)))
+                                }
+
+
+
+                                output[[3]] <- tmp
+
+                                names(output) <- c("data","factors","model")
+
+                                return(output)
+                               }
diff --git a/Function_scripts/transgression.calc.R b/Function_scripts/transgression.calc.R
new file mode 100644
index 0000000..8cf87ec
--- /dev/null
+++ b/Function_scripts/transgression.calc.R
@@ -0,0 +1,73 @@
+###function for calculating transgression in eQTL sets
+#The method is based on Brem, 2005 and assumes single measurements per strain per trait, except for the parental strains.
+
+###input
+#trait.value: a vector with trait values
+#strain: a vector with strain names matching the trait values
+#trait: a vector with trait names (e.g you can enter multiple traits this way)
+#parental.strain: names of parental strains (only for Keurentjes; otherwise parental strains should be removed from the analysis)
+#sigmas: how many sigmas should RILs be beyond the parents to be called significant? (standard is 2)
+#n.perm: number of permutations to be done (standard 1000)
+
+###output
+#a dataframe with traits per row and columns with the number of transgressive strains for the actual dataset and for FDR0.05 and 0.01 threshold
+
+
+
+    ###Transgression, calculated and permutated as in Brem, 2005
+    transgression.calc <- function(trait.value,strain,trait,parental.strain,sigmas,n.perm){
+                                  if(missing(trait.value)){                                 stop("provide trait values")}
+                                  if(missing(strain)){                                      stop("provide strain names")}
+                                  if(missing(trait)){                                       trait <- rep("A",length(trait_value))}
+                                  if(missing(parental.strain)){                             stop("provide names of two parental strains")}
+                                  if(missing(sigmas)){                                      sigmas <- 2}
+                                  if(missing(n.perm)){                                      n.perm <- 1000}
+
+                                  ###Convenience function for transgression
+                                  transg.sapply <- function(input,parental.strain,sigmas){
+                                                            selc.parental1 <- tolower(input$strain) %in% tolower(parental.strain[1])
+                                                            selc.parental2 <- tolower(input$strain) %in% tolower(parental.strain[2])
+
+                                                            VAR.PL <- tapply(input$trait.value[(selc.parental1|selc.parental2)],input$strain[(selc.parental1|selc.parental2)],var)
+                                                            N.PL <- tapply(input$trait.value[(selc.parental1|selc.parental2)],input$strain[(selc.parental1|selc.parental2)],length)
+                                                            SD.PL <- (sum(VAR.PL*(N.PL-1))/sum((N.PL-1)))^2
+
+                                                            MEAN.PL1 <- mean(input$trait.value[selc.parental1],na.rm=T)
+                                                            MEAN.PL2 <- mean(input$trait.value[selc.parental2],na.rm=T)
+
+                                                            tmp <- tapply(input$trait.value[!(selc.parental1|selc.parental2)],input$strain[!(selc.parental1|selc.parental2)],mean,na.rm=T)
+
+                                                            transg <- sum(tmp < (min(c(MEAN.PL1,MEAN.PL2))-sigmas*SD.PL) |
+                                                                          tmp > (max(c(MEAN.PL1,MEAN.PL2))+sigmas*SD.PL))
+
+                                                            return(transg)
+                                                           }
+
+                                  ###Prep the data
+                                  input <- data.frame(cbind(trait.value=trait.value,strain=strain))
+                                      input$trait.value <- as.numeric(as.character(unlist(input$trait.value)))
+                                      input$strain <- as.character(unlist(input$strain))
+
+                                  input <- split(input,trait)
+
+                                  transg <- sapply(input,transg.sapply,parental.strain = parental.strain, sigmas = sigmas)
+
+                                  perm <- NULL
+                                  for(i in 1:n.perm){
+                                      input.perm <- lapply(input,function(x){x[,1] <- x[order(runif(nrow(x))),1];return(x)})
+                                      transg.perm <- sapply(input.perm,transg.sapply,parental.strain = parental.strain, sigmas = sigmas)
+                                      perm <- rbind(perm,transg.perm)
+                                  }
+                                  FDR0.1 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.90)]})
+                                  FDR0.05 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.95)]})
+                                  FDR0.01 <- apply(perm,2,function(x){sort(x)[ceiling(n.perm*0.99)]})
+
+                                  output <- as.data.frame(cbind(trait=names(input),n_strains_transgression=transg,FDR0.1=FDR0.1,FDR0.05=FDR0.05,FDR0.01=FDR0.01))
+                                      output[,1] <- as.character(unlist(output[,1]))
+                                      output[,2] <- as.numeric(as.character(unlist(output[,2])))
+                                      output[,3] <- as.numeric(as.character(unlist(output[,3])))
+                                      output[,4] <- as.numeric(as.character(unlist(output[,4])))
+                                      output[,5] <- as.numeric(as.character(unlist(output[,5])))
+                                  return(output)
+                                 }
+
diff --git a/mainscript_aS_eQTL.R b/mainscript_aS_eQTL.R
index 9c61bda..5a0743d 100644
--- a/mainscript_aS_eQTL.R
+++ b/mainscript_aS_eQTL.R
@@ -20,8 +20,8 @@
             }
         setwd(workwd)
 
-    source(paste(git_dir,"/NEMA_functions/Loader.R",sep=""))
-    NEMA.get.functions("mainscript_aS_eQTL.R")
+    #source(paste(git_dir,"/NEMA_functions/Loader.R",sep=""))
+    #NEMA.get.functions("mainscript_aS_eQTL.R",NEMA_functions_dir = "C:/Users/sterk009/OneDrive - Wageningen University & Research/Shared_data_Mark/Projects_R_Zone/Git/NEMA_functions")
 
 
 # Packages ----------------------------------------------------------------
-- 
GitLab