Skip to content
Snippets Groups Projects
3-eqtl-visualization.R 20.4 KiB
Newer Older
Hartanto, Margi's avatar
Hartanto, Margi committed
##################################################
##### visualisation of eQTL analysis results #####
##################################################

#### prepare the script working environment ####
  remove(list = ls())
  gc()
  set.seed(1000)  
  
  # Set working directory ####
  work.dir <- "C:/Users/harta005/Projects/seed-germination-qtl"
  setwd(work.dir)

  # dependencies ####
  library(dplyr)
  library(ggplot2)
  library(tidyr)
  library(doParallel)
  library(VennDiagram)
  library(UpSetR)
  library(forcats)
  library(grid)
  
  library(heritability)
  library(topGO)
  library(org.At.tair.db)
  # unused libraries ####
  # library(Mfuzz)
  # library(Biobase)
  # library(gplots)
  # library(RColorBrewer)
  # library(limma)
  # library(factoextra)
  # library(stats)
  # library(amap)
  # 
  # 
  # 
  # library(gridExtra)
  # library(RCy3)
  # library(corrplot)
  # library(reshape2)
  # library(Hmisc)
  # library(igraph)
  # library(threejs)
  # library(MASS)
  # 

  
  # load required function ####
  setwd('functions/')
  for(i in 1:length(dir())){
    source(dir()[i])
  } # read function from Mark
  setwd(work.dir)
  
  overlap <- function(table, stage) {
    for (i in 1:length(stage)) {
      if (nrow(table) != 0) {
        table <- subset(table, table[stage[i]] == T)
      } else {
        break
      }
    }
    nrow(table)
  } # function to determine the overlapping qtl between 2 stages
  
  # load the data ####
  seed.stage <- c('pd', 'ar', 'im', 'rp')
  
  
  # presentation
  presentation <- theme(axis.text.x = element_text(size=10, face="bold", color="black"),
                        axis.text.y = element_text(size=10, face="bold", color="black"),
                        axis.title.x = element_text(size=12, face="bold", color="black"),
                        axis.title.y = element_text(size=12, face="bold", color="black"),
                        strip.text.x = element_text(size=12, face="bold", color="black"),
                        strip.text.y = element_text(size=12, face="bold", color="black"),
                        strip.text = element_text(size =12, face="bold", color="black"),
                        #plot.title = element_text(size=15, face="bold"),
                        panel.background = element_rect(fill = "white",color="black"),
                        panel.grid.major = element_line(colour = "grey80"),
                        panel.grid.minor = element_blank())
  
# combine the eqtl table and plotting - single stage ####
    
    all.eqtl.table <- data.frame(matrix(ncol = 20, nrow = 0))
    #colnames(all.eqtl.table) <- colnames(eqtl.table)

    for (stage in seed.stage) {
      print(stage)
      eqtl.table <- readRDS(paste0('qtl-tables/table_single-stage-eqtl_', stage, '.rds')) %>%
        mutate(stage = stage)
      all.eqtl.table <- rbind(all.eqtl.table, eqtl.table)
    }
    
    all.eqtl.table <- all.eqtl.table %>%
      mutate(qtl_type = ifelse(qtl_type == 'cis', 'local', 'distant'))
    
    saveRDS(all.eqtl.table, 'qtl-tables/table_single-stage-eqtl_all.rds')
  
# comparing LOD, effect, and R2 of shared local and distant eQTL - single stage ####
    
    eqtl.stat <- group_by(all.eqtl.table, stage, qtl_type) %>%
                  summarise(median_sig = round(median(qtl_significance), 2),
                            #sd_sig = sd(qtl_significance),
                            median_eff = round(median(abs(qtl_effect)), 2),
                            #sd_eff = sd(qtl_effect),
                            median_r2 = round(median(qtl_R2_sm), 2)) %>%
                            #sd_r2 = sd(qtl_R2_sm),) %>%
                  mutate(pval_sig = NA, pval_eff = NA, pval_r2 = NA) %>%
                  as.data.frame()
    
    for (i in 1:nrow(eqtl.stat)) {
      stage.row <- which(all.eqtl.table$stage == eqtl.stat[i, 1])
      eqtl.stat[i, 6] <- wilcox.test(qtl_significance ~ qtl_type, data = all.eqtl.table[stage.row, ])$p.value
      eqtl.stat[i, 7] <- wilcox.test(abs(qtl_effect) ~ qtl_type, data = all.eqtl.table[stage.row, ])$p.value
      eqtl.stat[i, 8] <- wilcox.test(qtl_R2_sm ~ qtl_type, data = all.eqtl.table[stage.row, ])$p.value
    }
    
# calculating shared local and distant eQTL ####
    
    qtl.table.summary <- data.frame(matrix(ncol = 18, nrow = 0))
    window.nu <- 4e6
    maxsize <- 100e6
    chr.num <- 5
  
    all.eqtl.table2 <- mutate(all.eqtl.table, interval = findInterval(qtl_bp, seq(1, maxsize, by = window.nu))) 
    all.eqtl.table2 <- subset(x = all.eqtl.table2, select = c(trait, qtl_chromosome, interval, qtl_type, 
                                                              stage))
    
    local.table <- all.eqtl.table2[all.eqtl.table2$qtl_type == 'local', ] %>%
      count(trait, qtl_chromosome, interval, qtl_type, stage) %>%
      spread(stage, n) %>%
      dplyr::select(-qtl_type) %>%
      arrange(trait)
    duplicated.local <- duplicated(local.table$trait)
    
    for (i in 1:nrow(local.table)) {
      if (i == 1) {
        next
      }
      if (duplicated.local[i] && 
          local.table$qtl_chromosome[i] == local.table$qtl_chromosome[i-1] &&
          abs(local.table$interval[i] - local.table$interval[i-1]) == 1 &&
          local.table$trait[i] == local.table$trait[i-1]) {
        print(i)
        table.tmp <- rbind(local.table[i, ], local.table[i-1, ])
        table.tmp <- aggregate(table.tmp[4:7], by = list(trait = table.tmp$trait), sum, na.rm = TRUE)
        table.tmp$qtl_chromosome <- local.table$qtl_chromosome[i]
        table.tmp$interval <- local.table$interval[i]
        table.tmp <- table.tmp[, c(colnames(local.table))]
        local.table[(i-1), 1] <- 'removed'
        local.table[i, ] <- table.tmp
      }
    }
    
    local.table <- local.table[which(local.table$trait != 'removed'), ]
    local.table[is.na(local.table)] <- 0
    write.csv(local.table, 'files/shared-local.csv')
    local.table <- local.table[, 4:7]
    local.table <- apply(local.table, 2, as.integer)
    
    distant.table <- all.eqtl.table2[all.eqtl.table2$qtl_type == 'distant', ] %>%
      count(trait, qtl_chromosome, interval, qtl_type, stage) %>%
      spread(stage, n) %>%
      dplyr::select(-qtl_type) %>%
      arrange(trait)
    duplicated.distant <- duplicated(distant.table$trait)
    
    for (i in 1:nrow(distant.table)) {
      if (i == 1) {
        next
      }
      if (duplicated.local[i] && 
          distant.table$qtl_chromosome[i] == distant.table$qtl_chromosome[i-1] &&
          abs(distant.table$interval[i] - distant.table$interval[i-1]) == 1 &&
          distant.table$trait[i] == distant.table$trait[i-1]) {
        print(i)
        table.tmp <- rbind(distant.table[i, ], distant.table[i-1, ])
        table.tmp <- aggregate(table.tmp[4:7], by = list(trait = table.tmp$trait), sum, na.rm = TRUE)
        table.tmp$qtl_chromosome <- distant.table$qtl_chromosome[i]
        table.tmp$interval <- distant.table$interval[i]
        table.tmp <- table.tmp[, c(colnames(distant.table))]
        distant.table[(i-1), 1] <- 'removed'
        distant.table[i, ] <- table.tmp
      }
    }
    
    distant.table <- distant.table[which(distant.table$trait != 'removed'), ]
    distant.table[is.na(distant.table)] <- 0
    write.csv(distant.table, 'files/shared-distant.csv')
    distant.table <- distant.table[, 4:7]
    distant.table <- apply(distant.table, 2, as.integer)
    
    #subset(qtl.table.cis, qtl.table.cis[development.stage[1]] == T)
    
    # venn diagram ####
    
    draw.quad.venn(area1 = overlap(local.table, 'pd'), 
                   area3 = overlap(local.table, 'ar'), 
                   area4 = overlap(local.table, 'im'), 
                   area2 = overlap(local.table, 'rp'), 
                   n13 = overlap(local.table, c('pd', 'ar')), 
                   n14 = overlap(local.table, c('pd', 'im')), 
                   n12 = overlap(local.table, c('pd', 'rp')), 
                   n34 = overlap(local.table, c('ar', 'im')), 
                   n23 = overlap(local.table, c('ar', 'rp')), 
                   n24 = overlap(local.table, c('im', 'rp')), 
                   n134 = overlap(local.table, c('pd', 'ar', 'im')), 
                   n123 = overlap(local.table, c('pd', 'ar', 'rp')), 
                   n124 = overlap(local.table, c('pd', 'im', 'rp')), 
                   n234 = overlap(local.table, c('ar', 'im', 'rp')), 
                   n1234 = overlap(local.table, c('pd', 'ar', 'im', 'rp')), 
                   category = c('primary\ndormant', 'radicle\nprotrusion', 'after-\nripenned', '6 hours after\nimbibition'),
                   lty = 'blank', 
                   fill = c('#4daf4a', '#e41a1c', '#377eb8', '#984ea3'),
                   alpha = 0.5)
    
    draw.quad.venn(area1 = overlap(distant.table, 'pd'), 
                   area3 = overlap(distant.table, 'ar'), 
                   area4 = overlap(distant.table, 'im'), 
                   area2 = overlap(distant.table, 'rp'), 
                   n13 = overlap(distant.table, c('pd', 'ar')), 
                   n14 = overlap(distant.table, c('pd', 'im')), 
                   n12 = overlap(distant.table, c('pd', 'rp')), 
                   n34 = overlap(distant.table, c('ar', 'im')), 
                   n23 = overlap(distant.table, c('ar', 'rp')), 
                   n24 = overlap(distant.table, c('im', 'rp')), 
                   n134 = overlap(distant.table, c('pd', 'ar', 'im')), 
                   n123 = overlap(distant.table, c('pd', 'ar', 'rp')), 
                   n124 = overlap(distant.table, c('pd', 'im', 'rp')), 
                   n234 = overlap(distant.table, c('ar', 'im', 'rp')), 
                   n1234 = overlap(distant.table, c('pd', 'ar', 'im', 'rp')), 
                   category = c('primary\ndormant', 'radicle\nprotrusion', 'after-\nripenned', '6 hours after\nimbibition'),
                   lty = 'blank', 
                   fill = c('#4daf4a', '#e41a1c', '#377eb8', '#984ea3'),
                   alpha = 0.5, )
    
    pdf(file = 'figures/distant-local-venn-diagram.pdf', width = 10, height = 5)
    grid.arrange(local.plot, distant.plot, ncol = 2, heights = c(5, 5))
    dev.off()
    
    # upset graph ####
    
    upset.local <- upset(data = as.data.frame(local.table), 
                         mainbar.y.max = 750, 
                        sets = c('rp', 'im', 'ar', 'pd'),
                        empty.intersections = 'on',
                        mainbar.y.label = 'shared eQTL',
                        sets.x.label = 'eQTL per stage', 
                        text.scale = 0.8, 
                        point.size = 1, 
                        line.size = 1,
                        keep.order = T,
                        set_size.scale_max = 1400)
    
    tiff(file = paste0("figures/upset-local.tiff"), 
         width = 1000, 
         height = 850, 
         units = 'px',
         res = 300,
         compression = 'lzw')
    upset.local
    dev.off()
    
    upset.distant <- upset(data = as.data.frame(distant.table), 
          mainbar.y.max = 750,
          sets = c('rp', 'im', 'ar', 'pd'),
          empty.intersections = 'on',
          mainbar.y.label = 'shared eQTL',
          sets.x.label = 'eQTL per stage', 
          text.scale = 0.8, 
          point.size = 1, 
          line.size = 1,
          keep.order = T, 
          set_size.scale_max = 1400)

    tiff(file = paste0("figures/upset-distant.tiff"), 
Hartanto, Margi's avatar
Hartanto, Margi committed
         height = 850, 
         units = 'px',
         res = 300,
         compression = 'lzw')
Hartanto, Margi's avatar
Hartanto, Margi committed
    dev.off()
    
# cis-trans plot - single stage #####
    
    all.eqtl.table3 <- all.eqtl.table %>%
      mutate(stage = replace(stage, qtl_type == 'local', 'local\neQTL'))
    #all.eqtl.table3$stage <- ordered(all.eqtl.table$stage, seed.stage)
    
    all.eqtl.table3$qtl_type <- factor(all.eqtl.table3$qtl_type)
    all.eqtl.table3 <- all.eqtl.table3 %>%
      mutate(allele = ifelse(sign(qtl_effect) <= 0, 'sha',
                             ifelse(sign(qtl_effect) >=0, 'bay', 'null')))
    
    eqtl.plot <- ggplot(all.eqtl.table3, aes(x = qtl_bp, y = gene_bp)) + 
      geom_segment(aes(x = qtl_bp_left, y = gene_bp, xend = qtl_bp_right, yend = gene_bp),
                   alpha = 0.75, colour = "grey") +
      geom_point(aes(colour = ordered(stage, levels = c("local\neQTL", "pd", "ar", "im", "rp")), 
                     shape = allele,
                     size = abs(qtl_effect),
                     alpha = log10(qtl_significance))
                 ) +
      scale_alpha(range = c(0.3, 1)) +
      facet_grid(fct_rev(gene_chromosome) ~ qtl_chromosome, space = "free", scales = "free") +
      presentation +
      scale_colour_manual('stage', values = c('black','#ccbb44', '#228833', '#4477aa', '#cc3311')) +
      labs(x = "eQTL peak position (Mb)", y = "Gene position (Mb)") +
      scale_x_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40)) +
      scale_y_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40))
    
    tiff(file = paste0("figures/eqtl-plot.tiff"), 
         width = 2250, 
         height = 1600, 
         units = 'px',
         res = 300,
         compression = 'lzw')
    eqtl.plot
    dev.off()
    
# histogram - single stage ####
     
    hist.threshold <- data.frame(stage =  seed.stage, exp = c(7.45098, 5.464286, 7, 6.470588), threshold = rep(NA, 4))
    for (i in 1:nrow(hist.threshold)) {
      hist.threshold$threshold[i] <- qpois(p = 0.0001, lambda = hist.threshold$exp[i], lower.tail = F)
    } # create horizontal line as a threshold
    
    
    eqtl.hist <- ggplot(all.eqtl.table3 %>% filter(qtl_type != 'local'), 
                        aes(x=qtl_bp, fill=ordered(stage, levels = c("pd", "ar", "im", "rp")))) + # 750 x 400
      geom_histogram(binwidth = 2000000, right = T, origin = 0, alpha = 1) +
      facet_grid(factor(stage, levels =  c("pd", "ar", "im", "rp")) ~ qtl_chromosome, space = "free",scales="free") +
      presentation +
      scale_fill_manual('stage', values = c('#ccbb44', '#228833', '#4477aa', '#cc3311')) +
      theme(legend.position = "right", legend.text=element_text(size=10)) +
      labs(x="eQTL peak position (Mb)",y="eQTL counts") +
      ylim(c(0, 100)) +
      geom_hline(data = hist.threshold, aes(yintercept = threshold), 
                 linetype = 'dashed', 
                 color = 'red', 
                 size = .1) +
      scale_x_continuous(breaks=c(5, 10, 15, 20, 25, 30, 35, 40)*10^6,labels=c(5, 10, 15, 20, 25, 30, 35, 40))
    
    tiff(file = paste0("figures/eqtl-hist.tiff"), 
         width = 2250, 
         height = 1200, 
         units = 'px',
         res = 300,
         compression = 'lzw')
    eqtl.hist
    dev.off()
    
# compare the R2, effect, andsignificance of local and distant eQTL ####
    r2.plot <- ggplot(all.eqtl.table, aes(x = qtl_R2_sm,
                                             fill = factor(qtl_type, levels = c('local', 'distant')),
                                             color = factor(qtl_type, levels = c('local', 'distant')))) +
      #geom_histogram(aes(y = ..density..), binwidth = 0.001, position = 'identity', alpha = 0.3) +
      geom_density(alpha = 0.5, size = 0) +
      facet_wrap( ~ factor(stage, levels = c('pd', 'ar', 'im', 'rp')), ncol = 2) +
      geom_vline(data = group_by(all.eqtl.table, stage, qtl_type) %>%
                   summarise(median = median(qtl_R2_sm)), aes(xintercept = median, 
                                                              color = factor(qtl_type, levels = c('local', 'distant'))),
                 linetype = 'dashed', size = .5) +
      presentation +
      scale_color_brewer(palette = 'Set1') +
      scale_fill_brewer(palette = 'Set1') +
      xlab('explained phenotypic variance (R2)') +
      ylab('density') +
      labs(fill = 'eQTL type', color = 'eQTL type') +
      #ylim(0, 1) + 
      xlim(0, 1) +
      theme(strip.text.x = element_text(size = 12))
    
    tiff(file = paste0("figures/r2-plot.tiff"), 
         width = 2250, 
         height = 1200, 
         units = 'px',
         res = 300,
         compression = 'lzw')
    r2.plot
    dev.off()
      
    effect.plot <- ggplot(all.eqtl.table, aes(x = log(abs(qtl_effect)),
                               fill = factor(qtl_type, levels = c('local', 'distant')),
                               color = factor(qtl_type, levels = c('local', 'distant')))) +
      #geom_histogram(aes(y = ..density..), binwidth = 0.001, position = 'identity', alpha = 0.3) +
      geom_density(alpha = 0.5, size = 0) +
      facet_wrap( ~ factor(stage, levels = c('pd', 'ar', 'im', 'rp')), ncol = 2) +
      geom_vline(data = group_by(all.eqtl.table, stage, qtl_type) %>%
                   summarise(median = median(log(abs(qtl_effect)))), aes(xintercept = median, 
                                                              color = factor(qtl_type, levels = c('local', 'distant'))),
                 linetype = 'dashed', size = .5) +
      presentation +
      scale_color_brewer(palette = 'Set1') +
      scale_fill_brewer(palette = 'Set1') +
      xlab('absolute eQTL effect') +
      ylab('density') +
      labs(fill = 'eQTL type', color = 'eQTL type') +
      #ylim(0, 1) + 
      #xlim(0, 1.2) +
      theme(strip.text.x = element_text(size = 12))
    
    tiff(file = paste0("figures/eff-plot.tiff"), 
         width = 2250, 
         height = 1200, 
         units = 'px',
         res = 300,
         compression = 'lzw')
    effect.plot
    dev.off()
    

    sig.plot <- ggplot(all.eqtl.table, aes(x = qtl_significance,
                               fill = factor(qtl_type, levels = c('local', 'distant')),
                               color = factor(qtl_type, levels = c('local', 'distant')))) +
      #geom_histogram(aes(y = ..density..), binwidth = 0.001, position = 'identity', alpha = 0.3) +
      geom_density(alpha = 0.5, size = 0) +
      facet_wrap( ~ factor(stage, levels = c('pd', 'ar', 'im', 'rp')), ncol = 2) +
      geom_vline(data = group_by(all.eqtl.table, stage, qtl_type) %>%
                   summarise(median = median(qtl_significance)), aes(xintercept = median, 
                                                               color = factor(qtl_type, levels = c('local', 'distant'))),
                 linetype = 'dashed', size = .5) +
      presentation +
      scale_color_brewer(palette = 'Set1') +
      scale_fill_brewer(palette = 'Set1') +
      xlab('-log10(p)') +
      ylab('density') +
      labs(fill = 'eQTL type', color = 'eQTL type') +
      #ylim(0, 1) + 
      xlim(0, 36) +
      theme(strip.text.x = element_text(size = 12))
    
    tiff(file = paste0("figures/sig-plot.tiff"), 
         width = 2250, 
         height = 1200, 
         units = 'px',
         res = 300,
         compression = 'lzw')
    sig.plot
    dev.off()
    
    # ggplot(all.eqtl.table, aes(x = h2_REML, y = qtl_R2_sm,
    #                                             fill = factor(qtl_type, levels = c('local', 'distant')),
    #                                             color = factor(qtl_type, levels = c('local', 'distant')))) +
    #   geom_point(size = 1, alpha = 0.5) +
    #   facet_wrap( ~ factor(stage, levels = c('pd', 'ar', 'im', 'rp')), ncol = 2) +
    #   geom_vline(data = group_by(all.eqtl.table, stage, qtl_type) %>%
    #                summarise(med = median(h2_REML, na.rm = T)), aes(xintercept = med, 
    #                                                                 color = factor(qtl_type, levels = c('local', 'distant'))),
    #              linetype = 'dashed', size = .5) +
    #   geom_hline(data = group_by(all.eqtl.table, stage, qtl_type) %>%
    #                summarise(med = median(qtl_R2_sm, na.rm = T)), aes(yintercept = med, 
    #                                                                   color = factor(qtl_type, levels = c('local', 'distant'))),
    #              linetype = 'dashed', size = .5) +
    #   geom_smooth(model = lm) +
    #   geom_abline(aes(slope = 1, intercept = 0), size = .75, linetype = 'dashed') +
    #   presentation +
    #   scale_color_brewer(palette = 'Set1') +
    #   scale_fill_brewer(palette = 'Set1') +
    #   xlab('narrow-sense heritabllity (h2)') +
    #   ylab('explained phenotypic variance (R2)') +
    #   labs(fill = 'eQTL type', color = 'eQTL type') +
    #   ylim(0, 1) + 
    #   xlim(0, 1) + 
    #   theme(strip.text.x = element_text(size = 8))