From 42e3804c74db818d1e2477e1a5b30b8f07a5c782 Mon Sep 17 00:00:00 2001
From: faroo002 <muhammad.farooq@wur.nl>
Date: Sat, 7 Nov 2020 10:54:26 +0100
Subject: [PATCH] uploaded

---
 code/make_coex_features_all.R |  49 +++---------
 code/make_go_features_all.R   | 135 ++++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+), 38 deletions(-)
 create mode 100644 code/make_go_features_all.R

diff --git a/code/make_coex_features_all.R b/code/make_coex_features_all.R
index c49179f..4e04ad3 100644
--- a/code/make_coex_features_all.R
+++ b/code/make_coex_features_all.R
@@ -8,27 +8,21 @@ is.installed <- function(mypkg)
     is.element(mypkg, installed.packages()[,1])
 } 
 ########################################################################################
-setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression")
+setwd("../data/priors/coexpression")
 library('dplyr')
 library('qgg')
 ########################################################################################
-MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds")
-accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds")
-all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds")
-ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds")
-MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds")
-W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds")
-all_coexpression_clusters <-as.data.frame(read.table(file ="/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE))
-all_coexpression_clusters <-as.data.frame(read.table(file ="/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE))
-coex_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers_number.rds")	#7432
-coex_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers.rds")			#7432
-markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/markerSets.rds")				
+MAC_matrix_with_header=readRDS(file="../../MAC_matrix_with_header.rds")
+accessions=readRDS(file="../../accessions.rds")
+all_nucl_genes_bed=readRDS(file="../../all_nucl_genes_bed.rds")
+ath_all_new_maf_ldpruned_map=readRDS(file="../../ath_all_new_maf_ldpruned_map.rds")
+MAC_df=readRDS(file="../../MAC_df.rds")
+W=readRDS(file="../../W.rds")
+all_coexpression_clusters <-as.data.frame(read.table(file ="ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE))
+coex_markers_number=readRDS(file="coex_markers_number.rds")	#7432
+coex_markers=readRDS(file="coex_markers.rds")			#7432
+markerSets=readRDS(file="markerSets.rds")				
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-regmap_W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_W.rds") #This is based on only regmap dataset using call_method_75_TAIR9_maf001_ldpruned
-regmap_G=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_G.rds")
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-psii_filtered_coex_df_unique=read.table(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/psii_filtered_coex_df_unique.tbl",header=T,sep="\t")
-psii_filtered_clusterids=as.character(psii_filtered_coex_df_unique$clusterid)
 ###########################################################################################
 message("Retrieving markers for each feature")
 if(!exists("coex_markers"))
@@ -100,24 +94,3 @@ saveRDS(rGF,"./rGF.rds")
 }
  
 ########################################################################################
-#Making GRM from only Regmap dataset without flipping
-message("Making Genomic Relationship Matrices...")
-	markerSets=markerSets[which(names(markerSets) %in% psii_filtered_clusterids)]
-	regmap_setsGF <- lapply(markerSets,function(x){ x[x %in% colnames(regmap_W)] })       
-	regmap_rsetsGF <- lapply(markerSets,function(x){ colnames(regmap_W)[colnames(regmap_W) %not_in% x] }) #remaining markers for each feature GO term      
-
-	regmap_nsets <- length(regmap_setsGF)
-	regmap_nsnps <- sapply(regmap_setsGF,length)
-
-	saveRDS(regmap_setsGF,"regmap_setsGF.rds")
-	saveRDS(regmap_rsetsGF,"regmap_rsetsGF.rds")
-	saveRDS(regmap_nsets,"regmap_nsets.rds")
-	saveRDS(regmap_nsnps,"regmap_nsnps.rds")
-  
-	#Make Genomic Relationship Matrix for each feature and its remaining markers.
-	regmap_GF <- lapply(regmap_setsGF, function(x) {grm(W = regmap_W[, x])}) 
-	saveRDS(regmap_GF,"regmap_GF.rds")
-
-	regmap_rGF <- lapply(regmap_rsetsGF, function(x) {grm(W = regmap_W[, x])}) 
-	saveRDS(regmap_rGF,"regmap_rGF.rds")
-########################################################################################
diff --git a/code/make_go_features_all.R b/code/make_go_features_all.R
new file mode 100644
index 0000000..e705738
--- /dev/null
+++ b/code/make_go_features_all.R
@@ -0,0 +1,135 @@
+message("start of get_go_features.R...")
+#############################  FUNCTIONS  ##############################################
+trim <- function (x) gsub("^\\s+|\\s+$", "", x)
+`%not_in%` <- purrr::negate(`%in%`)
+# Function to check whether package is installed
+is.installed <- function(mypkg)
+{
+    is.element(mypkg, installed.packages()[,1])
+} 
+########################################################################################
+setwd("../data/priors/go/")
+#if (!is.installed("backports"))
+#{
+#  install.packages("backports")
+#}
+#if (!is.installed("devtools"))
+#{
+#  install.packages("devtools")
+#}
+#library("devtools")
+#if (!is.installed("qgg"))
+#{
+#  options(devtools.install.args=" --no-multiargs")
+#  devtools::install_github("psoerensen/qgg")
+#}
+library('qgg')
+library('dplyr')
+library(GO.db)
+library(org.At.tair.db)
+########################################################################################
+MAC_matrix_with_header=readRDS(file="../../MAC_matrix_with_header.rds")
+accessions=readRDS(file="../../accessions.rds")
+all_nucl_genes_bed=readRDS(file="../../all_nucl_genes_bed.rds")
+ath_all_new_maf_ldpruned_map=readRDS(file="../../ath_all_new_maf_ldpruned_map.rds")
+Pheno=readRDS(file="../../Pheno.rds")
+geno_pheno=readRDS(file="../../geno_pheno.rds")
+MAC_df=readRDS(file="../../MAC_df.rds")
+pheno_df=readRDS(file="../../pheno_df.rds")
+W=readRDS(file="../../W.rds")
+
+go_all_genes_number=readRDS(file="go_all_genes_number.rds")		#7432
+go_all_markers_number=readRDS(file="go_all_markers_number.rds")	#7432
+go_all_markers=readRDS(file="go_all_markers.rds")			#7432
+markerSets=readRDS(file="markerSets.rds")				#7297
+setsGF=readRDS(file="setsGF.rds")					#7297
+rsetsGF=readRDS(file="rsetsGF.rds")					#7297
+nsets=readRDS(file="nsets.rds")					#7297
+nsnps=readRDS(file="nsnps.rds")					#7297
+GF=readRDS(file="GF.rds")						#7297
+rGF=readRDS(file="rGF.rds")						#7297	
+###########################################################################################
+message("Retrieving markers for each GO")
+if(!exists("go_all_markers"))
+{
+  go2alltair <- as.list(org.At.tairGO2ALLTAIRS)
+  go_all_markers<-rep(list(list()),length(go2alltair))
+  names(go_all_markers)<-(names(go2alltair))
+  go_all_genes_number<-numeric(length=length(go_all_markers))
+
+  if(length(go2alltair) > 0)
+  {
+    for(i in c(1:length(go2alltair)))  #Loop through all Go terms
+    {
+	message(paste("GO#",i)) 
+      	goid=names(go2alltair[i])
+      	tair_ids=go2alltair[i]
+      	lst_of_genes=list()
+      	for(j in 1:length(tair_ids[[1]])) #Loop through all Tair IDs in a GO term
+      	{
+      		lst_of_genes=c(lst_of_genes,unlist(unname(tair_ids[[1]][j])))
+      	}
+      	unique_lst_of_tair_ids<- as.vector(unique(unlist(lst_of_genes)))
+        go_all_genes_number[i]<-length(unique_lst_of_tair_ids)
+      	mlst<-list()
+      	for(j in 1:length(unique_lst_of_tair_ids))
+      	{       
+      		#for each gene find its coordinates and then find all markers in it. then append these markers to corresponding GO feature
+      		chromosome=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][1])
+      		startpos=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][2])
+      		endpos=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][3])
+      		markers=ath_all_new_maf_ldpruned_map %>% filter(chr==chromosome & position >= startpos & position<=endpos) %>% mutate_if(is.factor, as.character)
+      		#print(markers)
+      		mlst<-c(mlst,as.list(markers["marker_name"]))
+      		#print (mlst)
+        }
+        go_all_markers[i]<-list(unique(unlist(unname(mlst))))
+    }
+
+    go_all_markers_number<-numeric(length=length(go_all_markers))
+    for(i in c(1:length(go_all_markers)))
+    {
+      go_all_markers_number[i]=length(go_all_markers[[i]])
+    }
+    
+  }
+  saveRDS(go_all_genes_number,"go_all_genes_number.rds")
+  saveRDS(go_all_markers_number,"go_all_markers_number.rds")
+  saveRDS(go_all_markers,"go_all_markers.rds")
+}
+###########################################################################################
+#Make Genomic Relationship Matrix for each feature and its remaining markers
+message("Making Genomic Relationship Matrices...")
+if(!exists("rGF"))
+{
+  markerSets<-list()
+  for(i in c(1:length(go_all_markers)))
+  {
+    if(length(go_all_markers[[i]])>0)
+    {
+      markerSets=c(markerSets,go_all_markers[i])
+    }
+  }
+  setsGF <- lapply(markerSets,function(x){ x[x%in%colnames(W)] })       
+  rsetsGF <- lapply(markerSets,function(x){ colnames(W)[colnames(W) %not_in% x] }) #remaining markers for each feature GO term      
+
+  nsets <- length(setsGF)
+  nsnps <- sapply(setsGF,length)
+
+  saveRDS(markerSets,"markerSets.rds")
+  saveRDS(setsGF,"setsGF.rds")
+  saveRDS(rsetsGF,"rsetsGF.rds")
+  saveRDS(nsets,"nsets.rds")
+  saveRDS(nsnps,"nsnps.rds")
+  
+  #Make Genomic Relationship Matrix for each feature and its remaining markers.
+  GF <- lapply(setsGF, function(x) {grm(W = W[, x])}) 
+  saveRDS(GF,"GF.rds")
+
+  rGF <- lapply(rsetsGF, function(x) {grm(W = W[, x])}) 
+  saveRDS(rGF,"rGF.rds")
+
+}
+ 
+########################################################################################
+message("end of get_go_features.R...")
-- 
GitLab