From 0fe748dab5706d2f906ed5d218f1adf8ae343059 Mon Sep 17 00:00:00 2001 From: "Hartanto, Margi" <margi.hartanto@wur.nl> Date: Mon, 17 Feb 2020 18:19:56 +0100 Subject: [PATCH] repair Dist function, change heatmap colors, and fix upset plot --- 1-clustering.R | 64 ++++++++++++++++++++++++++++++++++++------ 3-eqtl-visualization.R | 4 +-- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/1-clustering.R b/1-clustering.R index 8ed33ea..ab8a030 100644 --- a/1-clustering.R +++ b/1-clustering.R @@ -24,6 +24,7 @@ library(amap) library(topGO) library(org.At.tair.db) + library(colorspace) # unused libraries #### # library(Mfuzz) # library(factoextra) @@ -39,6 +40,7 @@ trait.matrix <- read.csv(file = 'files/trait-matrix.csv', row.names = 1) sample.list <- read.csv(file = 'files/sample-list.csv') sample.stage <- sample.list$stage + stages <- c('pd', 'ar', 'im', 'rp') # data presentation #### presentation <- theme(axis.text.x = element_text(size=6, face="bold", color="black"), @@ -70,12 +72,15 @@ pc2 = pr.out$x[, 2], stage = sample.stage, population = c(rep('parent', 16), rep('RIL', 164))) + pc.sum <- summary(pr.out) + pc1.var <- round(pc.sum$importance[2, 'PC1'] * 100, 2) + pc2.var <- round(pc.sum$importance[2, 'PC2'] * 100, 2) pca.all <- ggplot(pc.df, aes(x = pc1, y = pc2, color = factor(stage, level = c('pd', 'ar', 'im', 'rp')))) + geom_point(aes(shape = population)) + scale_colour_manual(values = c('#ccbb44', '#228833', '#4477aa', '#cc3311')) + scale_shape_manual(values = c(17, 19)) + - labs(x = "PC1 (55.56%)", y = "PC2 (14.82%)") + + labs(x = paste0("PC1 (", pc1.var, "%)"), y = paste0("PC2 (", pc2.var, "%)")) + labs(colour = 'stage') + theme(text = element_text(size = 10), panel.background = element_blank(), @@ -99,7 +104,51 @@ pca.all dev.off() -# differential expresed gene analysis and hierarchiecal clustering #### +# PCA per stage #### + for (i in stages) { + + + trait.stage <- trait.matrix[, which(sample.stage == i)] + pr.stage <- prcomp(x = (t(trait.stage)), center = T, scale. = F) + pc.stage <- data.frame(pc1 = pr.stage$x[, 1], + pc2 = pr.stage$x[, 2], + population = c(rep('parent', 4), rep('RIL', ncol(trait.stage) - 4))) + pc.sum <- summary(pr.stage) + pc1.var <- round(pc.sum$importance[2, 'PC1'] * 100, 2) + pc2.var <- round(pc.sum$importance[2, 'PC2'] * 100, 2) + pca.stage.plot <- ggplot(pc.stage, aes(x = pc1, y = pc2)) + + geom_point(aes(shape = population)) + + #scale_colour_manual(values = c('#ccbb44', '#228833', '#4477aa', '#cc3311')) + + scale_shape_manual(values = c(17, 19)) + + labs(x = paste0("PC1 (", pc1.var, "%)"), y = paste0("PC2 (", pc2.var, "%)")) + + labs(colour = 'stage') + + theme(text = element_text(size = 10), + panel.background = element_blank(), + panel.border=element_rect(fill=NA), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + strip.background=element_blank(), + axis.text.x=element_text(colour="black"), + axis.text.y=element_text(colour="black"), + axis.ticks=element_line(colour="black"), + plot.margin=unit(c(1,1,1,1),"line")) + + #presentation + + geom_hline(aes(yintercept = 0), linetype = 'dashed', size = .1) + + geom_vline(aes(xintercept = 0), linetype = 'dashed', size = .1) + + tiff(file = paste0("figures/pca-", i, ".tiff"), + width = 2250, + height = 1200, + units = 'px', + res = 300, + compression = 'lzw') + print(pca.stage.plot) + dev.off() + } + + + + # differential expresed gene analysis and hierarchiecal clustering #### group <- with (pData(eset), stage) group <- factor(group) design <- model.matrix(~ 0 + group) @@ -142,7 +191,7 @@ # hierarchiecal tree clustering done for stage and genes #### # hc for stage - dist.matrix.stage <- Dist(t(cluster.data), + dist.matrix.stage <- amap::Dist(t(cluster.data), method = 'pearson', upper = T, diag = T) @@ -152,7 +201,7 @@ plot(reorder(x = as.dendrogram(hc.stage), 180:1, agglo.FUN = mean)) # hc for gene - dist.matrix.gene <- Dist(cluster.data, + dist.matrix.gene <- amap::Dist(cluster.data, method = 'pearson', # to group the genes based on patterns similarity across stages upper = T, diag = T) @@ -183,17 +232,16 @@ res = 300, compression = 'lzw') heatmap.2(x = as.matrix(cluster.data), cexRow = 0.8, cexCol = 1.2, - distfun = function(x) Dist(x, method = 'pearson'), + distfun = function(x) amap::Dist(x, method = 'pearson'), hclust = function(x) hclust(x, method = 'ward.D2'), scale = 'row', density.info = "density", trace = 'none', - col = brewer.pal(9, 'YlGnBu'), + col = diverging_hcl(100, palette = 'Blue-Red3'), keysize = 1, key.title = 'Z-score of\nlog-intensities', key.xlab = NA, - Colv = reorder(x = as.dendrogram(hc.stage), 180:1, agglo.FUN = mean), - ColSideColors = sample.color) + Colv = reorder(x = as.dendrogram(hc.stage), 180:1, agglo.FUN = mean)) dev.off() # GOE analysis for genes in each cluster #### diff --git a/3-eqtl-visualization.R b/3-eqtl-visualization.R index 3760182..c5184e3 100644 --- a/3-eqtl-visualization.R +++ b/3-eqtl-visualization.R @@ -277,12 +277,12 @@ set_size.scale_max = 1400) tiff(file = paste0("figures/upset-distant.tiff"), - width = 2250, + width = 1000, height = 850, units = 'px', res = 300, compression = 'lzw') - grid.arrange(upset.local, upset.distant, ncol = 2) + upset.distant dev.off() # cis-trans plot - single stage ##### -- GitLab