Skip to content
Snippets Groups Projects
Commit 42e3804c authored by Farooq, Muhammad's avatar Farooq, Muhammad
Browse files

uploaded

parent 78adac1c
Branches
No related tags found
No related merge requests found
......@@ -8,27 +8,21 @@ is.installed <- function(mypkg)
is.element(mypkg, installed.packages()[,1])
}
########################################################################################
setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression")
setwd("../data/priors/coexpression")
library('dplyr')
library('qgg')
########################################################################################
MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds")
accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds")
all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds")
ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds")
MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds")
W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds")
all_coexpression_clusters <-as.data.frame(read.table(file ="/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE))
all_coexpression_clusters <-as.data.frame(read.table(file ="/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE))
coex_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers_number.rds") #7432
coex_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers.rds") #7432
markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/markerSets.rds")
MAC_matrix_with_header=readRDS(file="../../MAC_matrix_with_header.rds")
accessions=readRDS(file="../../accessions.rds")
all_nucl_genes_bed=readRDS(file="../../all_nucl_genes_bed.rds")
ath_all_new_maf_ldpruned_map=readRDS(file="../../ath_all_new_maf_ldpruned_map.rds")
MAC_df=readRDS(file="../../MAC_df.rds")
W=readRDS(file="../../W.rds")
all_coexpression_clusters <-as.data.frame(read.table(file ="ath_gene_clusters.txt", header=TRUE, sep="",strip.white=TRUE))
coex_markers_number=readRDS(file="coex_markers_number.rds") #7432
coex_markers=readRDS(file="coex_markers.rds") #7432
markerSets=readRDS(file="markerSets.rds")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
regmap_W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_W.rds") #This is based on only regmap dataset using call_method_75_TAIR9_maf001_ldpruned
regmap_G=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_G.rds")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
psii_filtered_coex_df_unique=read.table(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/psii_filtered_coex_df_unique.tbl",header=T,sep="\t")
psii_filtered_clusterids=as.character(psii_filtered_coex_df_unique$clusterid)
###########################################################################################
message("Retrieving markers for each feature")
if(!exists("coex_markers"))
......@@ -100,24 +94,3 @@ saveRDS(rGF,"./rGF.rds")
}
########################################################################################
#Making GRM from only Regmap dataset without flipping
message("Making Genomic Relationship Matrices...")
markerSets=markerSets[which(names(markerSets) %in% psii_filtered_clusterids)]
regmap_setsGF <- lapply(markerSets,function(x){ x[x %in% colnames(regmap_W)] })
regmap_rsetsGF <- lapply(markerSets,function(x){ colnames(regmap_W)[colnames(regmap_W) %not_in% x] }) #remaining markers for each feature GO term
regmap_nsets <- length(regmap_setsGF)
regmap_nsnps <- sapply(regmap_setsGF,length)
saveRDS(regmap_setsGF,"regmap_setsGF.rds")
saveRDS(regmap_rsetsGF,"regmap_rsetsGF.rds")
saveRDS(regmap_nsets,"regmap_nsets.rds")
saveRDS(regmap_nsnps,"regmap_nsnps.rds")
#Make Genomic Relationship Matrix for each feature and its remaining markers.
regmap_GF <- lapply(regmap_setsGF, function(x) {grm(W = regmap_W[, x])})
saveRDS(regmap_GF,"regmap_GF.rds")
regmap_rGF <- lapply(regmap_rsetsGF, function(x) {grm(W = regmap_W[, x])})
saveRDS(regmap_rGF,"regmap_rGF.rds")
########################################################################################
message("start of get_go_features.R...")
############################# FUNCTIONS ##############################################
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
`%not_in%` <- purrr::negate(`%in%`)
# Function to check whether package is installed
is.installed <- function(mypkg)
{
is.element(mypkg, installed.packages()[,1])
}
########################################################################################
setwd("../data/priors/go/")
#if (!is.installed("backports"))
#{
# install.packages("backports")
#}
#if (!is.installed("devtools"))
#{
# install.packages("devtools")
#}
#library("devtools")
#if (!is.installed("qgg"))
#{
# options(devtools.install.args=" --no-multiargs")
# devtools::install_github("psoerensen/qgg")
#}
library('qgg')
library('dplyr')
library(GO.db)
library(org.At.tair.db)
########################################################################################
MAC_matrix_with_header=readRDS(file="../../MAC_matrix_with_header.rds")
accessions=readRDS(file="../../accessions.rds")
all_nucl_genes_bed=readRDS(file="../../all_nucl_genes_bed.rds")
ath_all_new_maf_ldpruned_map=readRDS(file="../../ath_all_new_maf_ldpruned_map.rds")
Pheno=readRDS(file="../../Pheno.rds")
geno_pheno=readRDS(file="../../geno_pheno.rds")
MAC_df=readRDS(file="../../MAC_df.rds")
pheno_df=readRDS(file="../../pheno_df.rds")
W=readRDS(file="../../W.rds")
go_all_genes_number=readRDS(file="go_all_genes_number.rds") #7432
go_all_markers_number=readRDS(file="go_all_markers_number.rds") #7432
go_all_markers=readRDS(file="go_all_markers.rds") #7432
markerSets=readRDS(file="markerSets.rds") #7297
setsGF=readRDS(file="setsGF.rds") #7297
rsetsGF=readRDS(file="rsetsGF.rds") #7297
nsets=readRDS(file="nsets.rds") #7297
nsnps=readRDS(file="nsnps.rds") #7297
GF=readRDS(file="GF.rds") #7297
rGF=readRDS(file="rGF.rds") #7297
###########################################################################################
message("Retrieving markers for each GO")
if(!exists("go_all_markers"))
{
go2alltair <- as.list(org.At.tairGO2ALLTAIRS)
go_all_markers<-rep(list(list()),length(go2alltair))
names(go_all_markers)<-(names(go2alltair))
go_all_genes_number<-numeric(length=length(go_all_markers))
if(length(go2alltair) > 0)
{
for(i in c(1:length(go2alltair))) #Loop through all Go terms
{
message(paste("GO#",i))
goid=names(go2alltair[i])
tair_ids=go2alltair[i]
lst_of_genes=list()
for(j in 1:length(tair_ids[[1]])) #Loop through all Tair IDs in a GO term
{
lst_of_genes=c(lst_of_genes,unlist(unname(tair_ids[[1]][j])))
}
unique_lst_of_tair_ids<- as.vector(unique(unlist(lst_of_genes)))
go_all_genes_number[i]<-length(unique_lst_of_tair_ids)
mlst<-list()
for(j in 1:length(unique_lst_of_tair_ids))
{
#for each gene find its coordinates and then find all markers in it. then append these markers to corresponding GO feature
chromosome=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][1])
startpos=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][2])
endpos=as.numeric(all_nucl_genes_bed[all_nucl_genes_bed["name"]==unique_lst_of_tair_ids[j]][3])
markers=ath_all_new_maf_ldpruned_map %>% filter(chr==chromosome & position >= startpos & position<=endpos) %>% mutate_if(is.factor, as.character)
#print(markers)
mlst<-c(mlst,as.list(markers["marker_name"]))
#print (mlst)
}
go_all_markers[i]<-list(unique(unlist(unname(mlst))))
}
go_all_markers_number<-numeric(length=length(go_all_markers))
for(i in c(1:length(go_all_markers)))
{
go_all_markers_number[i]=length(go_all_markers[[i]])
}
}
saveRDS(go_all_genes_number,"go_all_genes_number.rds")
saveRDS(go_all_markers_number,"go_all_markers_number.rds")
saveRDS(go_all_markers,"go_all_markers.rds")
}
###########################################################################################
#Make Genomic Relationship Matrix for each feature and its remaining markers
message("Making Genomic Relationship Matrices...")
if(!exists("rGF"))
{
markerSets<-list()
for(i in c(1:length(go_all_markers)))
{
if(length(go_all_markers[[i]])>0)
{
markerSets=c(markerSets,go_all_markers[i])
}
}
setsGF <- lapply(markerSets,function(x){ x[x%in%colnames(W)] })
rsetsGF <- lapply(markerSets,function(x){ colnames(W)[colnames(W) %not_in% x] }) #remaining markers for each feature GO term
nsets <- length(setsGF)
nsnps <- sapply(setsGF,length)
saveRDS(markerSets,"markerSets.rds")
saveRDS(setsGF,"setsGF.rds")
saveRDS(rsetsGF,"rsetsGF.rds")
saveRDS(nsets,"nsets.rds")
saveRDS(nsnps,"nsnps.rds")
#Make Genomic Relationship Matrix for each feature and its remaining markers.
GF <- lapply(setsGF, function(x) {grm(W = W[, x])})
saveRDS(GF,"GF.rds")
rGF <- lapply(rsetsGF, function(x) {grm(W = W[, x])})
saveRDS(rGF,"rGF.rds")
}
########################################################################################
message("end of get_go_features.R...")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment