From 42e3804c74db818d1e2477e1a5b30b8f07a5c782 Mon Sep 17 00:00:00 2001 From: faroo002 <muhammad.farooq@wur.nl> Date: Sat, 7 Nov 2020 10:54:26 +0100 Subject: [PATCH] uploaded --- code/make_coex_features_all.R | 49 +++--------- code/make_go_features_all.R | 135 ++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 38 deletions(-) create mode 100644 code/make_go_features_all.R diff --git a/code/make_coex_features_all.R b/code/make_coex_features_all.R index c49179f..4e04ad3 100644 --- a/code/make_coex_features_all.R +++ b/code/make_coex_features_all.R @@ -8,27 +8,21 @@ is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1]) } ######################################################################################## -setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression") +setwd("../data/priors/coexpression") library('dplyr') library('qgg') ######################################################################################## -MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") -accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") -all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") -ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") -MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") -W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") -all_coexpression_clusters <-as.data.frame(read.table(file ="/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE)) -all_coexpression_clusters <-as.data.frame(read.table(file ="/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE)) -coex_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers_number.rds") #7432 -coex_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers.rds") #7432 -markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/markerSets.rds") +MAC_matrix_with_header=readRDS(file="../../MAC_matrix_with_header.rds") +accessions=readRDS(file="../../accessions.rds") +all_nucl_genes_bed=readRDS(file="../../all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="../../ath_all_new_maf_ldpruned_map.rds") +MAC_df=readRDS(file="../../MAC_df.rds") +W=readRDS(file="../../W.rds") +all_coexpression_clusters <-as.data.frame(read.table(file ="ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE)) +coex_markers_number=readRDS(file="coex_markers_number.rds") #7432 +coex_markers=readRDS(file="coex_markers.rds") #7432 +markerSets=readRDS(file="markerSets.rds") #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -regmap_W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_W.rds") #This is based on only regmap dataset using call_method_75_TAIR9_maf001_ldpruned -regmap_G=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_G.rds") -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -psii_filtered_coex_df_unique=read.table(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/psii_filtered_coex_df_unique.tbl",header=T,sep="\t") -psii_filtered_clusterids=as.character(psii_filtered_coex_df_unique$clusterid) ########################################################################################### message("Retrieving markers for each feature") if(!exists("coex_markers")) @@ -100,24 +94,3 @@ saveRDS(rGF,"./rGF.rds") } ######################################################################################## -#Making GRM from only Regmap dataset without flipping -message("Making Genomic Relationship Matrices...") - markerSets=markerSets[which(names(markerSets) %in% psii_filtered_clusterids)] - regmap_setsGF <- lapply(markerSets,function(x){ x[x %in% colnames(regmap_W)] }) - regmap_rsetsGF <- lapply(markerSets,function(x){ colnames(regmap_W)[colnames(regmap_W) %not_in% x] }) #remaining markers for each feature GO term - - regmap_nsets <- length(regmap_setsGF) - regmap_nsnps <- sapply(regmap_setsGF,length) - - saveRDS(regmap_setsGF,"regmap_setsGF.rds") - saveRDS(regmap_rsetsGF,"regmap_rsetsGF.rds") - saveRDS(regmap_nsets,"regmap_nsets.rds") - saveRDS(regmap_nsnps,"regmap_nsnps.rds") - - #Make Genomic Relationship Matrix for each feature and its remaining markers. - regmap_GF <- lapply(regmap_setsGF, function(x) {grm(W = regmap_W[, x])}) - saveRDS(regmap_GF,"regmap_GF.rds") - - regmap_rGF <- lapply(regmap_rsetsGF, function(x) {grm(W = regmap_W[, x])}) - saveRDS(regmap_rGF,"regmap_rGF.rds") -######################################################################################## diff --git a/code/make_go_features_all.R b/code/make_go_features_all.R new file mode 100644 index 0000000..e705738 --- /dev/null +++ b/code/make_go_features_all.R @@ -0,0 +1,135 @@ +message("start of get_go_features.R...") +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## +setwd("../data/priors/go/") +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +library('qgg') +library('dplyr') +library(GO.db) +library(org.At.tair.db) +######################################################################################## +MAC_matrix_with_header=readRDS(file="../../MAC_matrix_with_header.rds") +accessions=readRDS(file="../../accessions.rds") +all_nucl_genes_bed=readRDS(file="../../all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="../../ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="../../Pheno.rds") +geno_pheno=readRDS(file="../../geno_pheno.rds") +MAC_df=readRDS(file="../../MAC_df.rds") +pheno_df=readRDS(file="../../pheno_df.rds") +W=readRDS(file="../../W.rds") + +go_all_genes_number=readRDS(file="go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="go_all_markers.rds") #7432 +markerSets=readRDS(file="markerSets.rds") #7297 +setsGF=readRDS(file="setsGF.rds") #7297 +rsetsGF=readRDS(file="rsetsGF.rds") #7297 +nsets=readRDS(file="nsets.rds") #7297 +nsnps=readRDS(file="nsnps.rds") #7297 +GF=readRDS(file="GF.rds") #7297 +rGF=readRDS(file="rGF.rds") #7297 +########################################################################################### +message("Retrieving markers for each GO") +if(!exists("go_all_markers")) +{ + go2alltair <- as.list(org.At.tairGO2ALLTAIRS) + go_all_markers<-rep(list(list()),length(go2alltair)) + names(go_all_markers)<-(names(go2alltair)) + go_all_genes_number<-numeric(length=length(go_all_markers)) + + if(length(go2alltair) > 0) + { + for(i in c(1:length(go2alltair))) #Loop through all Go terms + { + message(paste("GO#",i)) + goid=names(go2alltair[i]) + tair_ids=go2alltair[i] + lst_of_genes=list() + for(j in 1:length(tair_ids[[1]])) #Loop through all Tair IDs in a GO term + { + lst_of_genes=c(lst_of_genes,unlist(unname(tair_ids[[1]][j]))) + } + unique_lst_of_tair_ids<- as.vector(unique(unlist(lst_of_genes))) + go_all_genes_number[i]<-length(unique_lst_of_tair_ids) + mlst<-list() + for(j in 1:length(unique_lst_of_tair_ids)) + { + #for each gene find its coordinates and then find all markers in it. then append these markers to corresponding GO feature + chromosome=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][1]) + startpos=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][2]) + endpos=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][3]) + markers=ath_all_new_maf_ldpruned_map %>% filter(chr==chromosome & position >= startpos & position<=endpos) %>% mutate_if(is.factor, as.character) + #print(markers) + mlst<-c(mlst,as.list(markers["marker_name"])) + #print (mlst) + } + go_all_markers[i]<-list(unique(unlist(unname(mlst)))) + } + + go_all_markers_number<-numeric(length=length(go_all_markers)) + for(i in c(1:length(go_all_markers))) + { + go_all_markers_number[i]=length(go_all_markers[[i]]) + } + + } + saveRDS(go_all_genes_number,"go_all_genes_number.rds") + saveRDS(go_all_markers_number,"go_all_markers_number.rds") + saveRDS(go_all_markers,"go_all_markers.rds") +} +########################################################################################### +#Make Genomic Relationship Matrix for each feature and its remaining markers +message("Making Genomic Relationship Matrices...") +if(!exists("rGF")) +{ + markerSets<-list() + for(i in c(1:length(go_all_markers))) + { + if(length(go_all_markers[[i]])>0) + { + markerSets=c(markerSets,go_all_markers[i]) + } + } + setsGF <- lapply(markerSets,function(x){ x[x%in%colnames(W)] }) + rsetsGF <- lapply(markerSets,function(x){ colnames(W)[colnames(W) %not_in% x] }) #remaining markers for each feature GO term + + nsets <- length(setsGF) + nsnps <- sapply(setsGF,length) + + saveRDS(markerSets,"markerSets.rds") + saveRDS(setsGF,"setsGF.rds") + saveRDS(rsetsGF,"rsetsGF.rds") + saveRDS(nsets,"nsets.rds") + saveRDS(nsnps,"nsnps.rds") + + #Make Genomic Relationship Matrix for each feature and its remaining markers. + GF <- lapply(setsGF, function(x) {grm(W = W[, x])}) + saveRDS(GF,"GF.rds") + + rGF <- lapply(rsetsGF, function(x) {grm(W = W[, x])}) + saveRDS(rGF,"rGF.rds") + +} + +######################################################################################## +message("end of get_go_features.R...") -- GitLab