Skip to content
Snippets Groups Projects
combined-stage-eqtl-analysis.R 15.5 KiB
Newer Older
Hartanto, Margi's avatar
Hartanto, Margi committed
########################################
##### combined-stage eQTL analysis #####
########################################

#### prepare the script working environment ####
  remove(list = ls())
  gc()
  set.seed(1000)  
  
  # Set working directory ####
  work.dir <- "C:/Users/harta005/Projects/seed-germination-qtl"
  setwd(work.dir)

  # dependencies ####
  library(dplyr)
  library(ggplot2)
  library(tidyr)
  library(doParallel)
  library(VennDiagram)
  library(UpSetR)
  library(forcats)
  library(grid)
  
  library(heritability)
  library(topGO)
  library(org.At.tair.db)
  # unused libraries ####
  # library(Mfuzz)
  # library(Biobase)
  # library(gplots)
  # library(RColorBrewer)
  # library(limma)
  # library(factoextra)
  # library(stats)
  # library(amap)
  # 
  # 
  # 
  # library(gridExtra)
  # library(RCy3)
  # library(corrplot)
  # library(reshape2)
  # library(Hmisc)
  # library(igraph)
  # library(threejs)
  # library(MASS)
  # 

  # load required function ####
  setwd('functions/')
  for(i in 1:length(dir())){
    source(dir()[i])
  } # read function from Mark
  setwd(work.dir)
  
  overlap <- function(table, stage) {
    for (i in 1:length(stage)) {
      if (nrow(table) != 0) {
        table <- subset(table, table[stage[i]] == T)
      } else {
        break
      }
    }
    nrow(table)
  } # function to determine the overlapping qtl between 2 stages
  
  # load the data ####
  seed.stage <- c('pd', 'ar', 'im', 'rp')
  
  
  # presentation
  presentation <- theme(axis.text.x = element_text(size=10, face="bold", color="black"),
                        axis.text.y = element_text(size=10, face="bold", color="black"),
                        axis.title.x = element_text(size=12, face="bold", color="black"),
                        axis.title.y = element_text(size=12, face="bold", color="black"),
                        strip.text.x = element_text(size=12, face="bold", color="black"),
                        strip.text.y = element_text(size=12, face="bold", color="black"),
                        strip.text = element_text(size =12, face="bold", color="black"),
                        #plot.title = element_text(size=15, face="bold"),
                        panel.background = element_rect(fill = "white",color="black"),
                        panel.grid.major = element_line(colour = "grey80"),
                        panel.grid.minor = element_blank())
  
#### combined-stage eQTL mapping - reduced model ####
    
    stage <- 'rm'
    qtl.data <- QTL.data.prep(trait.matrix = trait.matrix, 
                              strain.trait = colnames(trait.matrix), 
                              strain.map = genetic.map, 
                              strain.marker = marker)
    
    cluster <- makeCluster(n.cores, type = "PSOCK")
    registerDoParallel(cluster)
    output <- foreach(i = 1:nrow(trait.matrix), .combine = 'cbind') %dopar% {
      map.all.marker(trait = trait.matrix[i, ], markers = genetic.map)
    }
    stopCluster(cluster)
    
    pval.out <- t(output[, which(colnames(output) == 'LOD')])
    eff.out <- t(output[, which(colnames(output) == 'Eff')])
    
    colnames(pval.out) <- rownames(marker); rownames(pval.out) <- rownames(trait.matrix)
    colnames(eff.out) <- rownames(marker); rownames(eff.out) <- rownames(trait.matrix)
    
    qtl.profile <- NULL; qtl.profile <- as.list(qtl.profile)
    qtl.profile[[1]] <- round(pval.out,digits=2)
    qtl.profile[[2]] <- round(eff.out,digits=3)
    qtl.profile[[3]] <- trait.matrix
    qtl.profile[[4]] <- genetic.map
    qtl.profile[[5]] <- marker
    names(qtl.profile) <- c("LOD","Effect","Trait","Map","Marker")
    
    write.EleQTL(map1.output = qtl.profile, filename = paste0("qtl-tables/table_single-stage-eqtl_", 'rm'))
    saveRDS(object = qtl.profile, 
            file = paste0("qtl-profiles/profile_combined-stage-eqtl_", stage, ".rds")) 
    
    # eQTL peak finder - combined-stage rm  ####
    
    stage <- 'rm'
    
    # peak finder
    qtl.profile <- readRDS(paste0('qtl-profiles/profile_single-stage-eqtl_', stage, '.rds'))
    qtl.peak <- mapping.to.list(map1.output = qtl.profile) %>%
      peak.finder(threshold = 4.3)
    qtl.peak <- na.omit(qtl.peak)
    saveRDS(object = qtl.peak,
            file = paste0("qtl-peaks/peak_combined-stage-eqtl_", stage, ".rds"))
    
    
    # eQTL table- combined-stage rm ####
    stage <- 'rm'
    qtl.profile <- readRDS(paste0('qtl-profiles/profile_combined-stage-eqtl_', stage, '.rds'))
    qtl.peak <- readRDS(paste0('qtl-peaks/peak_combined-stage-eqtl_', stage, '.rds'))
    eqtl.table <- eQTL.table(peak.list.file = qtl.peak, trait.annotation = gene.info) %>%
      eQTL.table.addR2(QTL.prep.file = qtl.data)
    eqtl.table$qtl_chromosome <- as.factor(eqtl.table$qtl_chromosome)
    eqtl.table$gene_chromosome <- as.factor(eqtl.table$gene_chromosome)
    
    # add heritability - combined-stage rm #####
    
    h2.result <- as.list(NULL)
    n.perm <- 100
    qtl.genes <- eqtl.table$trait.matrix
    
    map <- genetic.map
    map <- apply(map, 2, as.numeric) # make sure the alleles are treated as numeric
    trait <- trait.matrix
    
    map2 <- map
    #map2[map2 == 0] <- 0.5
    map2[map2 == -1] <- round(0, 0)
    #map2[is.na(map2)] <- 0.5
    kinship.matrix <- emma.kinship(map2)
    colnames(kinship.matrix) <- colnames(map2); rownames(kinship.matrix) <- colnames(map2)
    
    cluster <- makeCluster(n.cores, type = 'PSOCK')
    clusterExport(cl = cluster, c('trait', 'map2', 'marker_h2', 'kinship.matrix', 'h2.REML'))
    h2 <- t(parApply(cl = cluster, X = trait, MARGIN = 1, FUN = h2.REML, strain.names = colnames(map2), kinship.matrix = kinship.matrix, Vg.factor = 1))
    stopCluster(cluster)
    
    h2 <- cbind.data.frame(trait = rownames(h2), h2)
    eqtl.table <- merge(x = eqtl.table, y = h2[, 1:2], by = 'trait', all.x = T)
    eqtl.table <- rename(eqtl.table, h2 = h2_REML)
    
    # trans bands identification - combined-stage rm  #####
    window.nu <- 2e6
    maxsize <- 100e6
    chr.num <- 5
    
    transband.id <- mutate(eqtl.table, interval = findInterval(qtl_bp, seq(1, maxsize, by = window.nu))) %>%
      group_by(qtl_chromosome, interval, qtl_type) %>%
      summarise(n.ct = length(unique(trait))) %>%
      data.frame() %>%
      group_by(qtl_type) %>%
      mutate(exp.ct = mean(as.numeric(unlist(n.ct)))) %>%
      data.frame() %>%
      mutate(transband_significance = ppois(n.ct, lambda = exp.ct, lower.tail = F)) %>%
      filter(transband_significance < 0.0001, qtl_type == "trans")
    
    transband.id$transband_id <- with(transband.id, paste0("ch", qtl_chromosome, ":", 
                                                           (interval - 1) * 2, "-", 
                                                           interval * 2, "Mb"))
    transband.id$stage <- stage
    
    saveRDS(object = transband.id,
            file = paste0("trans-bands/trans.bands_", stage, ".rds"))
    
    # for rm
    if( stage == 'rm' ) {
      eqtl.table <- mutate(eqtl.table,
                           trans_band = ifelse(qtl_type == "trans" &
                                                 qtl_chromosome == 5 &
                                                 qtl_bp > 6e6 &
                                                 qtl_bp <= 8e6, "ch5:6-8Mb",
                                               ifelse(qtl_type == "trans" &
                                                        qtl_chromosome == 1 &
                                                        qtl_bp > 26e6 &
                                                        qtl_bp <= 28e6, "ch1:26-28Mb",
                                                      ifelse(qtl_type == "trans" &
                                                               qtl_chromosome == 2 &
                                                               qtl_bp > 10e6 &
                                                               qtl_bp <= 14e6, "ch2:10-14Mb",
                                                             ifelse(qtl_type == "trans" &
                                                                      qtl_chromosome == 4 &
                                                                      qtl_bp > 0e6 &
                                                                      qtl_bp <= 2e6, "ch4:0-2Mb",
                                                                    ifelse(qtl_type == "trans" &
                                                                             qtl_chromosome == 5 &
                                                                             qtl_bp > 14e6 &
                                                                             qtl_bp <= 16e6, "ch5:14-16Mb","none"))))))
    }
    
    print("finish")
  
    write.csv(x = eqtl.table,
              file = paste0("qtl-tables/table_combined-stage-eqtl_", stage, ".csv"), 
              row.names = T)
    saveRDS(object = eqtl.table,
            file = paste0("qtl-tables/table_combined-stage-eqtl_", stage, ".rds"))
    table(eqtl.table$qtl_type, eqtl.table$trans_band != "none")
    
    # GO for trans bands - combined-stage rm ####
    
    ontology <- 'BP'
    
    x <- org.At.tairCHR
    all.genes <- as.list(rownames(trait.matrix))
    transband.go <- as.data.frame(matrix(data = NA, nrow = 0, ncol = 9))
    colnames(transband.go) <- c('GO.ID', 'Term', 'Annotated', 'Significant', 'Expected', 'Fisher', 
                                'FDR', 'stage', 'transband')
    
      eqtl.table <- readRDS(paste0('qtl-tables/table_combined-stage-eqtl_', stage, '.rds'))
      transband.id <- unique(eqtl.table$trans_band)
      transband.id <- transband.id[!transband.id %in% 'none']
      transband.go.perstage <- as.data.frame(matrix(data = NA, nrow = 0, ncol = 9))
      colnames(transband.go.perstage) <- c('GO.ID', 'Term', 'Annotated', 'Significant', 'Expected', 'Fisher', 
                                           'FDR', 'stage', 'transband')
      
      for (i in 1:length(transband.id)) {
        gene.set <- eqtl.table[which(eqtl.table$trans_band == transband.id[i]), 'trait']
        gene.set <- factor(as.integer(all.genes %in% gene.set))
        names(gene.set) <- all.genes
        GOdata <- new("topGOdata",
                      description = "GOE for genes in regulated by trans bands", ontology = ontology,
                      allGenes = gene.set, 
                      annot = annFUN.org,mapping= "org.At.tair.db")
        resultFisher <- runTest(GOdata, algorithm = 'weight', statistic = "fisher")
        result.df <- GenTable(GOdata, Fisher = resultFisher,
                              orderBy = "Fisher", ranksOf = "Fisher", topNodes = length(resultFisher@score))
        result.df$stage <- stage
        result.df$transband <- transband.id[i]
        result.df$Fisher <- as.numeric(result.df$Fisher)
        result.df$FDR <- p.adjust(p = result.df$Fisher, method = 'fdr')
        result.df <- result.df[order(result.df$FDR), ]
        result.df <- dplyr::filter(result.df, Fisher <= 0.01)
        transband.go <- rbind(transband.go, result.df)
      }

    write.csv(transband.go, paste0('files/trans-bands-rm-go', ontology, '.csv'))
    
    # prepare eqtl table and plotting - combined-stage rm ####
    
    eqtl.table <- eqtl.table %>%
      mutate(qtl_type = ifelse(qtl_type == 'cis', 'local', 'distant'))
  
    # cis-trans plot - combined-stage rm ####
    
    eqtl.plot <- ggplot(eqtl.table, aes(x = qtl_bp, y = gene_bp)) + 
      geom_segment(aes(x = qtl_bp_left, y = gene_bp, xend = qtl_bp_right, yend = gene_bp),
                   alpha = 0.25, colour = "grey") +
      geom_point(aes(colour = ordered(qtl_type, levels = c('local', 'distant'))), alpha = .75) +
      facet_grid(fct_rev(gene_chromosome) ~ qtl_chromosome, space = "free", scales = "free") +
      presentation +
      scale_colour_manual('eQTL type', values = c('black', '#377eb8')) +
      labs(x = "eQTL peak position (Mb)", y = "Gene position (Mb)") +
      scale_x_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40)) +
      scale_y_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40))
    
    pdf(file = paste0("figures/cis-trans-plot-rm.pdf"), width = 10, height = 8)
    eqtl.plot
    dev.off()
    
    # histogram - combined-stage rm ####
    
    eqtl.hist <- ggplot(eqtl.table %>% filter(qtl_type != 'local'), 
                        aes(x=qtl_bp)) + # 750 x 400
      geom_histogram(binwidth = 2000000, alpha = 0.8, fill = '#377EB8') +
      facet_grid(. ~ qtl_chromosome, space = "free",scales="free") +
      presentation +
      scale_fill_manual(values = 'blue') +
      theme(legend.position = "right", legend.text=element_text(size=10)) +
      labs(x="eQTL peak position (Mb)",y="eQTL counts") +
      ylim(c(0, 80)) +
      geom_hline(aes(yintercept = 11.72), 
                 linetype = 'dashed', 
                 color = 'red', 
                 size = .1) +
      scale_x_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40))
    
    pdf(file = "figures/trans-eqtl-histogram-rm.pdf", width = 10, height = 5)
    eqtl.hist
    dev.off()
    
    # R2 vs heritability - combined-stage rm ####
    ggplot(eqtl.table, aes(x = qtl_R2_sm,
                               fill = factor(qtl_type, levels = c('local', 'distant')),
                               color = factor(qtl_type, levels = c('local', 'distant')))) +
      geom_histogram(aes(y = ..density..), binwidth = 0.01, position = 'identity', alpha = 0) +
      geom_density(alpha = 0.5, size = 0) +
      geom_vline(aes(xintercept = median(qtl_R2_sm),
                     color = factor(qtl_type, levels = c('local', 'distant'))),
                 linetype = 'dashed', size = .5) +
      presentation +
      scale_color_brewer(palette = 'Set1') +
      scale_fill_brewer(palette = 'Set1') +
      xlab('explained phenotypic variance (R2)') +
      ylab('count of eQTL') +
      labs(fill = 'eQTL type', color = 'eQTL type') +
      #ylim(0, 1) + 
      xlim(0, 1) +
      theme(strip.text.x = element_text(size = 8))
    
    ggplot(eqtl.table, aes(x = h2, y = qtl_R2_sm,
                               fill = factor(qtl_type, levels = c('local', 'distant')),
                               color = factor(qtl_type, levels = c('local', 'distant')))) +
      geom_point(size = 1, alpha = 0.5) +
      geom_vline(data = group_by(eqtl.table, qtl_type) %>%
                   summarise(med = median(h2, na.rm = T)), 
                 aes(xintercept = med, color = factor(qtl_type, levels = c('local', 'distant'))),
                 linetype = 'dashed', size = .5) +
      geom_hline(data = group_by(eqtl.table, qtl_type) %>%
                   summarise(med = median(qtl_R2_sm, na.rm = T)), 
                 aes(yintercept = med, color = factor(qtl_type, levels = c('local', 'distant'))),
                 linetype = 'dashed', size = .5) +
      geom_smooth(model = lm) +
      geom_abline(aes(slope = 1, intercept = 0), size = .75, linetype = 'dashed') +
      presentation +
      scale_color_brewer(palette = 'Set1') +
      scale_fill_brewer(palette = 'Set1') +
      xlab('narrow-sense heritabllity (h2)') +
      ylab('explained phenotypic variance (R2)') +
      labs(fill = 'eQTL type', color = 'eQTL type') +
      ylim(0, 1) + 
      xlim(0, 1) + 
      theme(strip.text.x = element_text(size = 8))