From 0fe748dab5706d2f906ed5d218f1adf8ae343059 Mon Sep 17 00:00:00 2001
From: "Hartanto, Margi" <margi.hartanto@wur.nl>
Date: Mon, 17 Feb 2020 18:19:56 +0100
Subject: [PATCH] repair Dist function, change heatmap colors, and fix upset
 plot

---
 1-clustering.R         | 64 ++++++++++++++++++++++++++++++++++++------
 3-eqtl-visualization.R |  4 +--
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/1-clustering.R b/1-clustering.R
index 8ed33ea..ab8a030 100644
--- a/1-clustering.R
+++ b/1-clustering.R
@@ -24,6 +24,7 @@
   library(amap)
   library(topGO)
   library(org.At.tair.db)
+  library(colorspace)
   # unused libraries ####
   # library(Mfuzz)
   # library(factoextra)
@@ -39,6 +40,7 @@
   trait.matrix <- read.csv(file = 'files/trait-matrix.csv', row.names = 1)
   sample.list <- read.csv(file = 'files/sample-list.csv')
   sample.stage <- sample.list$stage
+  stages <- c('pd', 'ar', 'im', 'rp')
   
 # data presentation ####
   presentation <- theme(axis.text.x = element_text(size=6, face="bold", color="black"),
@@ -70,12 +72,15 @@
                           pc2 = pr.out$x[, 2], 
                           stage = sample.stage, 
                           population = c(rep('parent', 16), rep('RIL', 164)))
+  pc.sum <- summary(pr.out)
+  pc1.var <- round(pc.sum$importance[2, 'PC1'] * 100, 2)
+  pc2.var <- round(pc.sum$importance[2, 'PC2'] * 100, 2)
   
   pca.all <- ggplot(pc.df, aes(x = pc1, y = pc2, color = factor(stage, level = c('pd', 'ar', 'im', 'rp')))) + 
                             geom_point(aes(shape = population)) +
                             scale_colour_manual(values = c('#ccbb44', '#228833', '#4477aa', '#cc3311')) +
                             scale_shape_manual(values = c(17, 19)) +
-                            labs(x = "PC1 (55.56%)", y = "PC2 (14.82%)") +
+                            labs(x = paste0("PC1 (", pc1.var, "%)"), y = paste0("PC2 (", pc2.var, "%)")) +
                             labs(colour = 'stage') +
                             theme(text = element_text(size = 10),
                                   panel.background = element_blank(),
@@ -99,7 +104,51 @@
   pca.all
   dev.off()  
   
-# differential expresed gene analysis and hierarchiecal clustering ####
+# PCA per stage ####
+  for (i in stages) {
+
+    
+    trait.stage <- trait.matrix[, which(sample.stage == i)]
+    pr.stage <- prcomp(x = (t(trait.stage)),  center = T, scale. = F)
+    pc.stage <- data.frame(pc1 = pr.stage$x[, 1], 
+                        pc2 = pr.stage$x[, 2], 
+                        population = c(rep('parent', 4), rep('RIL', ncol(trait.stage) - 4)))
+    pc.sum <- summary(pr.stage)
+    pc1.var <- round(pc.sum$importance[2, 'PC1'] * 100, 2)
+    pc2.var <- round(pc.sum$importance[2, 'PC2'] * 100, 2)
+    pca.stage.plot <- ggplot(pc.stage, aes(x = pc1, y = pc2)) + 
+      geom_point(aes(shape = population)) +
+      #scale_colour_manual(values = c('#ccbb44', '#228833', '#4477aa', '#cc3311')) +
+      scale_shape_manual(values = c(17, 19)) +
+      labs(x = paste0("PC1 (", pc1.var, "%)"), y = paste0("PC2 (", pc2.var, "%)")) +
+      labs(colour = 'stage') +
+      theme(text = element_text(size = 10),
+            panel.background = element_blank(),
+            panel.border=element_rect(fill=NA),
+            panel.grid.major = element_blank(),
+            panel.grid.minor = element_blank(),
+            strip.background=element_blank(),
+            axis.text.x=element_text(colour="black"),
+            axis.text.y=element_text(colour="black"),
+            axis.ticks=element_line(colour="black"),
+            plot.margin=unit(c(1,1,1,1),"line")) +
+      #presentation +
+      geom_hline(aes(yintercept = 0), linetype = 'dashed', size = .1) +
+      geom_vline(aes(xintercept = 0), linetype = 'dashed', size = .1) 
+
+    tiff(file = paste0("figures/pca-", i, ".tiff"), 
+         width = 2250, 
+         height = 1200, 
+         units = 'px',
+         res = 300,
+         compression = 'lzw')
+    print(pca.stage.plot)
+    dev.off()  
+  }
+  
+  
+
+  # differential expresed gene analysis and hierarchiecal clustering ####
   group <- with (pData(eset), stage)
   group <- factor(group)
   design <- model.matrix(~ 0 + group)
@@ -142,7 +191,7 @@
 # hierarchiecal tree clustering done for stage and genes ####
   
   # hc for stage
-  dist.matrix.stage <- Dist(t(cluster.data), 
+  dist.matrix.stage <- amap::Dist(t(cluster.data), 
                             method = 'pearson', 
                             upper = T, 
                             diag = T) 
@@ -152,7 +201,7 @@
   plot(reorder(x = as.dendrogram(hc.stage), 180:1, agglo.FUN = mean))
   
   # hc for gene
-  dist.matrix.gene <- Dist(cluster.data, 
+  dist.matrix.gene <- amap::Dist(cluster.data, 
                            method = 'pearson', # to group the genes based on patterns similarity across stages
                            upper = T, 
                            diag = T)  
@@ -183,17 +232,16 @@
        res = 300,
        compression = 'lzw')
   heatmap.2(x = as.matrix(cluster.data), cexRow = 0.8, cexCol = 1.2,
-                  distfun = function(x) Dist(x, method = 'pearson'),
+                  distfun = function(x) amap::Dist(x, method = 'pearson'),
                   hclust = function(x) hclust(x, method = 'ward.D2'),
                   scale = 'row', 
                   density.info = "density", 
                   trace = 'none', 
-                  col = brewer.pal(9, 'YlGnBu'),
+                  col = diverging_hcl(100, palette = 'Blue-Red3'),
                   keysize = 1,
                   key.title = 'Z-score of\nlog-intensities',
                   key.xlab = NA, 
-                  Colv = reorder(x = as.dendrogram(hc.stage), 180:1, agglo.FUN = mean), 
-                  ColSideColors = sample.color)
+                  Colv = reorder(x = as.dendrogram(hc.stage), 180:1, agglo.FUN = mean))
   dev.off() 
  
 # GOE analysis for genes in each cluster ####
diff --git a/3-eqtl-visualization.R b/3-eqtl-visualization.R
index 3760182..c5184e3 100644
--- a/3-eqtl-visualization.R
+++ b/3-eqtl-visualization.R
@@ -277,12 +277,12 @@
           set_size.scale_max = 1400)
 
     tiff(file = paste0("figures/upset-distant.tiff"), 
-         width = 2250, 
+         width = 1000, 
          height = 850, 
          units = 'px',
          res = 300,
          compression = 'lzw')
-    grid.arrange(upset.local, upset.distant, ncol = 2)
+    upset.distant
     dev.off()
     
 # cis-trans plot - single stage #####
-- 
GitLab