diff --git a/code/using_GO/pla/T14_1_1001_2000.R b/code/using_GO/pla/T14_1_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..5e317bd1e8e751b9c66b3f9362ae00e1ab18722a --- /dev/null +++ b/code/using_GO/pla/T14_1_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_1_gblup_variances_all=rep(list(list()),cycles) +T14_1_gblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_variances_all=rep(list(list()),cycles) +T14_1_gfblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_1...") + y=pheno_df_pla$T14_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_1_gblup_variances_all[[r]]<-var + T14_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_1_gblup_variances_all[[r]]<-list() + T14_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_1_gfblup_variances_all[[r]]<-var + T14_1_gfblup_prediction_all[[r]]<-pred + T14_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_1_gblup_variances_all,"T14_1_gblup_variances_all_1001_2000.rds") +saveRDS(T14_1_gblup_prediction_all,"T14_1_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_1_gfblup_variances_all,"T14_1_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_1_gfblup_prediction_all,"T14_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_1_gfblup_validate_all,"T14_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_1_1_1000.R b/code/using_GO/pla/T14_1_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..9516a0f7de3a414c67888e923b42aded00a359b5 --- /dev/null +++ b/code/using_GO/pla/T14_1_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_1_gblup_variances_all=rep(list(list()),cycles) +T14_1_gblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_variances_all=rep(list(list()),cycles) +T14_1_gfblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_1...") + y=pheno_df_pla$T14_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_1_gblup_variances_all[[r]]<-var + T14_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_1_gblup_variances_all[[r]]<-list() + T14_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_1_gfblup_variances_all[[r]]<-var + T14_1_gfblup_prediction_all[[r]]<-pred + T14_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_1_gblup_variances_all,"T14_1_gblup_variances_all_1_1000.rds") +saveRDS(T14_1_gblup_prediction_all,"T14_1_gblup_prediction_all_1_1000.rds") +saveRDS(T14_1_gfblup_variances_all,"T14_1_gfblup_variances_all_1_1000.rds") +saveRDS(T14_1_gfblup_prediction_all,"T14_1_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_1_gfblup_validate_all,"T14_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_1_2001_3000.R b/code/using_GO/pla/T14_1_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..231392187c2aea5bb7de4fee173eb0886c3cd31d --- /dev/null +++ b/code/using_GO/pla/T14_1_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_1_gblup_variances_all=rep(list(list()),cycles) +T14_1_gblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_variances_all=rep(list(list()),cycles) +T14_1_gfblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_1...") + y=pheno_df_pla$T14_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_1_gblup_variances_all[[r]]<-var + T14_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_1_gblup_variances_all[[r]]<-list() + T14_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_1_gfblup_variances_all[[r]]<-var + T14_1_gfblup_prediction_all[[r]]<-pred + T14_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_1_gblup_variances_all,"T14_1_gblup_variances_all_2001_3000.rds") +saveRDS(T14_1_gblup_prediction_all,"T14_1_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_1_gfblup_variances_all,"T14_1_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_1_gfblup_prediction_all,"T14_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_1_gfblup_validate_all,"T14_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_1_3001_4000.R b/code/using_GO/pla/T14_1_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..8ac39a53f616ad79e6a466ff6e7790f51ddc12bd --- /dev/null +++ b/code/using_GO/pla/T14_1_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_1_gblup_variances_all=rep(list(list()),cycles) +T14_1_gblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_variances_all=rep(list(list()),cycles) +T14_1_gfblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_1...") + y=pheno_df_pla$T14_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_1_gblup_variances_all[[r]]<-var + T14_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_1_gblup_variances_all[[r]]<-list() + T14_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_1_gfblup_variances_all[[r]]<-var + T14_1_gfblup_prediction_all[[r]]<-pred + T14_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_1_gblup_variances_all,"T14_1_gblup_variances_all_3001_4000.rds") +saveRDS(T14_1_gblup_prediction_all,"T14_1_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_1_gfblup_variances_all,"T14_1_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_1_gfblup_prediction_all,"T14_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_1_gfblup_validate_all,"T14_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_1_4001_5000.R b/code/using_GO/pla/T14_1_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..3e47124948810f85cc972080f42f731cd9392e20 --- /dev/null +++ b/code/using_GO/pla/T14_1_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_1_gblup_variances_all=rep(list(list()),cycles) +T14_1_gblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_variances_all=rep(list(list()),cycles) +T14_1_gfblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_1...") + y=pheno_df_pla$T14_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_1_gblup_variances_all[[r]]<-var + T14_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_1_gblup_variances_all[[r]]<-list() + T14_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_1_gfblup_variances_all[[r]]<-var + T14_1_gfblup_prediction_all[[r]]<-pred + T14_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_1_gblup_variances_all,"T14_1_gblup_variances_all_4001_5000.rds") +saveRDS(T14_1_gblup_prediction_all,"T14_1_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_1_gfblup_variances_all,"T14_1_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_1_gfblup_prediction_all,"T14_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_1_gfblup_validate_all,"T14_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_1_5001_6000.R b/code/using_GO/pla/T14_1_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..eeed787694c5a9c1af6c6da1cd7964ad96fc9a80 --- /dev/null +++ b/code/using_GO/pla/T14_1_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_1_gblup_variances_all=rep(list(list()),cycles) +T14_1_gblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_variances_all=rep(list(list()),cycles) +T14_1_gfblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_1...") + y=pheno_df_pla$T14_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_1_gblup_variances_all[[r]]<-var + T14_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_1_gblup_variances_all[[r]]<-list() + T14_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_1_gfblup_variances_all[[r]]<-var + T14_1_gfblup_prediction_all[[r]]<-pred + T14_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_1_gblup_variances_all,"T14_1_gblup_variances_all_5001_6000.rds") +saveRDS(T14_1_gblup_prediction_all,"T14_1_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_1_gfblup_variances_all,"T14_1_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_1_gfblup_prediction_all,"T14_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_1_gfblup_validate_all,"T14_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_1_6001_7297.R b/code/using_GO/pla/T14_1_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..7697a444ca12d379b5f24b2793952f3ceeeaf190 --- /dev/null +++ b/code/using_GO/pla/T14_1_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_1_gblup_variances_all=rep(list(list()),cycles) +T14_1_gblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_variances_all=rep(list(list()),cycles) +T14_1_gfblup_prediction_all=rep(list(list()),cycles) +T14_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_1...") + y=pheno_df_pla$T14_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_1_gblup_variances_all[[r]]<-var + T14_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_1_gblup_variances_all[[r]]<-list() + T14_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_1_gfblup_variances_all[[r]]<-var + T14_1_gfblup_prediction_all[[r]]<-pred + T14_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_1_gblup_variances_all,"T14_1_gblup_variances_all_6001_7297.rds") +saveRDS(T14_1_gblup_prediction_all,"T14_1_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_1_gfblup_variances_all,"T14_1_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_1_gfblup_prediction_all,"T14_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_1_gfblup_validate_all,"T14_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_2_1001_2000.R b/code/using_GO/pla/T14_2_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..5a11ab6c4163a34cd81face106bb9f82ee38b449 --- /dev/null +++ b/code/using_GO/pla/T14_2_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_2_gblup_variances_all=rep(list(list()),cycles) +T14_2_gblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_variances_all=rep(list(list()),cycles) +T14_2_gfblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_2...") + y=pheno_df_pla$T14_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_2_gblup_variances_all[[r]]<-var + T14_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_2_gblup_variances_all[[r]]<-list() + T14_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_2_gfblup_variances_all[[r]]<-var + T14_2_gfblup_prediction_all[[r]]<-pred + T14_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_2_gblup_variances_all,"T14_2_gblup_variances_all_1001_2000.rds") +saveRDS(T14_2_gblup_prediction_all,"T14_2_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_2_gfblup_variances_all,"T14_2_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_2_gfblup_prediction_all,"T14_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_2_gfblup_validate_all,"T14_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_2_1_1000.R b/code/using_GO/pla/T14_2_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..e4d1021615955b54b48e1140e0c63a4d2c49a3ee --- /dev/null +++ b/code/using_GO/pla/T14_2_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_2_gblup_variances_all=rep(list(list()),cycles) +T14_2_gblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_variances_all=rep(list(list()),cycles) +T14_2_gfblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_2...") + y=pheno_df_pla$T14_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_2_gblup_variances_all[[r]]<-var + T14_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_2_gblup_variances_all[[r]]<-list() + T14_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_2_gfblup_variances_all[[r]]<-var + T14_2_gfblup_prediction_all[[r]]<-pred + T14_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_2_gblup_variances_all,"T14_2_gblup_variances_all_1_1000.rds") +saveRDS(T14_2_gblup_prediction_all,"T14_2_gblup_prediction_all_1_1000.rds") +saveRDS(T14_2_gfblup_variances_all,"T14_2_gfblup_variances_all_1_1000.rds") +saveRDS(T14_2_gfblup_prediction_all,"T14_2_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_2_gfblup_validate_all,"T14_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_2_2001_3000.R b/code/using_GO/pla/T14_2_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..f1414c60269b04d021616f371c2c5f6cf8acd2dd --- /dev/null +++ b/code/using_GO/pla/T14_2_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_2_gblup_variances_all=rep(list(list()),cycles) +T14_2_gblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_variances_all=rep(list(list()),cycles) +T14_2_gfblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_2...") + y=pheno_df_pla$T14_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_2_gblup_variances_all[[r]]<-var + T14_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_2_gblup_variances_all[[r]]<-list() + T14_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_2_gfblup_variances_all[[r]]<-var + T14_2_gfblup_prediction_all[[r]]<-pred + T14_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_2_gblup_variances_all,"T14_2_gblup_variances_all_2001_3000.rds") +saveRDS(T14_2_gblup_prediction_all,"T14_2_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_2_gfblup_variances_all,"T14_2_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_2_gfblup_prediction_all,"T14_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_2_gfblup_validate_all,"T14_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_2_3001_4000.R b/code/using_GO/pla/T14_2_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..ae553eab07c0bef1cab09b35017edd504da3f868 --- /dev/null +++ b/code/using_GO/pla/T14_2_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_2_gblup_variances_all=rep(list(list()),cycles) +T14_2_gblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_variances_all=rep(list(list()),cycles) +T14_2_gfblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_2...") + y=pheno_df_pla$T14_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_2_gblup_variances_all[[r]]<-var + T14_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_2_gblup_variances_all[[r]]<-list() + T14_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_2_gfblup_variances_all[[r]]<-var + T14_2_gfblup_prediction_all[[r]]<-pred + T14_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_2_gblup_variances_all,"T14_2_gblup_variances_all_3001_4000.rds") +saveRDS(T14_2_gblup_prediction_all,"T14_2_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_2_gfblup_variances_all,"T14_2_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_2_gfblup_prediction_all,"T14_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_2_gfblup_validate_all,"T14_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_2_4001_5000.R b/code/using_GO/pla/T14_2_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..97de7ee28e7ecb75ef40973130b96e7e00cadd99 --- /dev/null +++ b/code/using_GO/pla/T14_2_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_2_gblup_variances_all=rep(list(list()),cycles) +T14_2_gblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_variances_all=rep(list(list()),cycles) +T14_2_gfblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_2...") + y=pheno_df_pla$T14_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_2_gblup_variances_all[[r]]<-var + T14_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_2_gblup_variances_all[[r]]<-list() + T14_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_2_gfblup_variances_all[[r]]<-var + T14_2_gfblup_prediction_all[[r]]<-pred + T14_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_2_gblup_variances_all,"T14_2_gblup_variances_all_4001_5000.rds") +saveRDS(T14_2_gblup_prediction_all,"T14_2_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_2_gfblup_variances_all,"T14_2_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_2_gfblup_prediction_all,"T14_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_2_gfblup_validate_all,"T14_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_2_5001_6000.R b/code/using_GO/pla/T14_2_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..fc380291e002715f3b0001e626ed067694a4a68b --- /dev/null +++ b/code/using_GO/pla/T14_2_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_2_gblup_variances_all=rep(list(list()),cycles) +T14_2_gblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_variances_all=rep(list(list()),cycles) +T14_2_gfblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_2...") + y=pheno_df_pla$T14_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_2_gblup_variances_all[[r]]<-var + T14_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_2_gblup_variances_all[[r]]<-list() + T14_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_2_gfblup_variances_all[[r]]<-var + T14_2_gfblup_prediction_all[[r]]<-pred + T14_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_2_gblup_variances_all,"T14_2_gblup_variances_all_5001_6000.rds") +saveRDS(T14_2_gblup_prediction_all,"T14_2_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_2_gfblup_variances_all,"T14_2_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_2_gfblup_prediction_all,"T14_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_2_gfblup_validate_all,"T14_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_2_6001_7297.R b/code/using_GO/pla/T14_2_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..1e2fb34f65dc4aee1ab10b454815a2fde0768ab3 --- /dev/null +++ b/code/using_GO/pla/T14_2_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_2_gblup_variances_all=rep(list(list()),cycles) +T14_2_gblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_variances_all=rep(list(list()),cycles) +T14_2_gfblup_prediction_all=rep(list(list()),cycles) +T14_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_2...") + y=pheno_df_pla$T14_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_2_gblup_variances_all[[r]]<-var + T14_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_2_gblup_variances_all[[r]]<-list() + T14_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_2_gfblup_variances_all[[r]]<-var + T14_2_gfblup_prediction_all[[r]]<-pred + T14_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_2_gblup_variances_all,"T14_2_gblup_variances_all_6001_7297.rds") +saveRDS(T14_2_gblup_prediction_all,"T14_2_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_2_gfblup_variances_all,"T14_2_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_2_gfblup_prediction_all,"T14_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_2_gfblup_validate_all,"T14_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_3_1001_2000.R b/code/using_GO/pla/T14_3_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..d39f3ac47875915eece7cd25925cdd5099133fa9 --- /dev/null +++ b/code/using_GO/pla/T14_3_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_3_gblup_variances_all=rep(list(list()),cycles) +T14_3_gblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_variances_all=rep(list(list()),cycles) +T14_3_gfblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_3...") + y=pheno_df_pla$T14_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_3_gblup_variances_all[[r]]<-var + T14_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_3_gblup_variances_all[[r]]<-list() + T14_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_3_gfblup_variances_all[[r]]<-var + T14_3_gfblup_prediction_all[[r]]<-pred + T14_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_3_gblup_variances_all,"T14_3_gblup_variances_all_1001_2000.rds") +saveRDS(T14_3_gblup_prediction_all,"T14_3_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_3_gfblup_variances_all,"T14_3_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_3_gfblup_prediction_all,"T14_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_3_gfblup_validate_all,"T14_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_3_1_1000.R b/code/using_GO/pla/T14_3_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..e88f5c0b67e367173fc5a64d3edd1fa69b7a59f4 --- /dev/null +++ b/code/using_GO/pla/T14_3_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_3_gblup_variances_all=rep(list(list()),cycles) +T14_3_gblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_variances_all=rep(list(list()),cycles) +T14_3_gfblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_3...") + y=pheno_df_pla$T14_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_3_gblup_variances_all[[r]]<-var + T14_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_3_gblup_variances_all[[r]]<-list() + T14_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_3_gfblup_variances_all[[r]]<-var + T14_3_gfblup_prediction_all[[r]]<-pred + T14_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_3_gblup_variances_all,"T14_3_gblup_variances_all_1_1000.rds") +saveRDS(T14_3_gblup_prediction_all,"T14_3_gblup_prediction_all_1_1000.rds") +saveRDS(T14_3_gfblup_variances_all,"T14_3_gfblup_variances_all_1_1000.rds") +saveRDS(T14_3_gfblup_prediction_all,"T14_3_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_3_gfblup_validate_all,"T14_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_3_2001_3000.R b/code/using_GO/pla/T14_3_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..151fff055f9d65278c5bfd22ca66286525d547dc --- /dev/null +++ b/code/using_GO/pla/T14_3_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_3_gblup_variances_all=rep(list(list()),cycles) +T14_3_gblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_variances_all=rep(list(list()),cycles) +T14_3_gfblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_3...") + y=pheno_df_pla$T14_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_3_gblup_variances_all[[r]]<-var + T14_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_3_gblup_variances_all[[r]]<-list() + T14_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_3_gfblup_variances_all[[r]]<-var + T14_3_gfblup_prediction_all[[r]]<-pred + T14_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_3_gblup_variances_all,"T14_3_gblup_variances_all_2001_3000.rds") +saveRDS(T14_3_gblup_prediction_all,"T14_3_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_3_gfblup_variances_all,"T14_3_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_3_gfblup_prediction_all,"T14_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_3_gfblup_validate_all,"T14_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_3_3001_4000.R b/code/using_GO/pla/T14_3_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..8cc719c3562f96124f0eef40b88a56bcaa9bb8b2 --- /dev/null +++ b/code/using_GO/pla/T14_3_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_3_gblup_variances_all=rep(list(list()),cycles) +T14_3_gblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_variances_all=rep(list(list()),cycles) +T14_3_gfblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_3...") + y=pheno_df_pla$T14_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_3_gblup_variances_all[[r]]<-var + T14_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_3_gblup_variances_all[[r]]<-list() + T14_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_3_gfblup_variances_all[[r]]<-var + T14_3_gfblup_prediction_all[[r]]<-pred + T14_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_3_gblup_variances_all,"T14_3_gblup_variances_all_3001_4000.rds") +saveRDS(T14_3_gblup_prediction_all,"T14_3_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_3_gfblup_variances_all,"T14_3_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_3_gfblup_prediction_all,"T14_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_3_gfblup_validate_all,"T14_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_3_4001_5000.R b/code/using_GO/pla/T14_3_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..7100d155e486e572b13af3d32b533dabbab51d91 --- /dev/null +++ b/code/using_GO/pla/T14_3_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_3_gblup_variances_all=rep(list(list()),cycles) +T14_3_gblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_variances_all=rep(list(list()),cycles) +T14_3_gfblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_3...") + y=pheno_df_pla$T14_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_3_gblup_variances_all[[r]]<-var + T14_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_3_gblup_variances_all[[r]]<-list() + T14_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_3_gfblup_variances_all[[r]]<-var + T14_3_gfblup_prediction_all[[r]]<-pred + T14_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_3_gblup_variances_all,"T14_3_gblup_variances_all_4001_5000.rds") +saveRDS(T14_3_gblup_prediction_all,"T14_3_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_3_gfblup_variances_all,"T14_3_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_3_gfblup_prediction_all,"T14_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_3_gfblup_validate_all,"T14_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_3_5001_6000.R b/code/using_GO/pla/T14_3_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..26e941c439b12d9b187a4e46cd70301f335c60b3 --- /dev/null +++ b/code/using_GO/pla/T14_3_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_3_gblup_variances_all=rep(list(list()),cycles) +T14_3_gblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_variances_all=rep(list(list()),cycles) +T14_3_gfblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_3...") + y=pheno_df_pla$T14_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_3_gblup_variances_all[[r]]<-var + T14_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_3_gblup_variances_all[[r]]<-list() + T14_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_3_gfblup_variances_all[[r]]<-var + T14_3_gfblup_prediction_all[[r]]<-pred + T14_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_3_gblup_variances_all,"T14_3_gblup_variances_all_5001_6000.rds") +saveRDS(T14_3_gblup_prediction_all,"T14_3_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_3_gfblup_variances_all,"T14_3_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_3_gfblup_prediction_all,"T14_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_3_gfblup_validate_all,"T14_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_3_6001_7297.R b/code/using_GO/pla/T14_3_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..9130398b04e5a297e0e06e3adde115a31dea735f --- /dev/null +++ b/code/using_GO/pla/T14_3_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_3_gblup_variances_all=rep(list(list()),cycles) +T14_3_gblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_variances_all=rep(list(list()),cycles) +T14_3_gfblup_prediction_all=rep(list(list()),cycles) +T14_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_3...") + y=pheno_df_pla$T14_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_3_gblup_variances_all[[r]]<-var + T14_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_3_gblup_variances_all[[r]]<-list() + T14_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_3_gfblup_variances_all[[r]]<-var + T14_3_gfblup_prediction_all[[r]]<-pred + T14_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_3_gblup_variances_all,"T14_3_gblup_variances_all_6001_7297.rds") +saveRDS(T14_3_gblup_prediction_all,"T14_3_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_3_gfblup_variances_all,"T14_3_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_3_gfblup_prediction_all,"T14_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_3_gfblup_validate_all,"T14_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_4_1001_2000.R b/code/using_GO/pla/T14_4_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..3e4419da67531bf122954302ce2ea980277503e6 --- /dev/null +++ b/code/using_GO/pla/T14_4_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_4_gblup_variances_all=rep(list(list()),cycles) +T14_4_gblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_variances_all=rep(list(list()),cycles) +T14_4_gfblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_4...") + y=pheno_df_pla$T14_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_4_gblup_variances_all[[r]]<-var + T14_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_4_gblup_variances_all[[r]]<-list() + T14_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_4_gfblup_variances_all[[r]]<-var + T14_4_gfblup_prediction_all[[r]]<-pred + T14_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_4_gblup_variances_all,"T14_4_gblup_variances_all_1001_2000.rds") +saveRDS(T14_4_gblup_prediction_all,"T14_4_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_4_gfblup_variances_all,"T14_4_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_4_gfblup_prediction_all,"T14_4_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_4_gfblup_validate_all,"T14_4_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_4_1_1000.R b/code/using_GO/pla/T14_4_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..fb82682437af2202908cadc561225beff16b6073 --- /dev/null +++ b/code/using_GO/pla/T14_4_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_4_gblup_variances_all=rep(list(list()),cycles) +T14_4_gblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_variances_all=rep(list(list()),cycles) +T14_4_gfblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_4...") + y=pheno_df_pla$T14_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_4_gblup_variances_all[[r]]<-var + T14_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_4_gblup_variances_all[[r]]<-list() + T14_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_4_gfblup_variances_all[[r]]<-var + T14_4_gfblup_prediction_all[[r]]<-pred + T14_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_4_gblup_variances_all,"T14_4_gblup_variances_all_1_1000.rds") +saveRDS(T14_4_gblup_prediction_all,"T14_4_gblup_prediction_all_1_1000.rds") +saveRDS(T14_4_gfblup_variances_all,"T14_4_gfblup_variances_all_1_1000.rds") +saveRDS(T14_4_gfblup_prediction_all,"T14_4_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_4_gfblup_validate_all,"T14_4_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_4_2001_3000.R b/code/using_GO/pla/T14_4_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..6bd2e0b6bb7e2196c2ed4b0391970764879aab02 --- /dev/null +++ b/code/using_GO/pla/T14_4_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_4_gblup_variances_all=rep(list(list()),cycles) +T14_4_gblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_variances_all=rep(list(list()),cycles) +T14_4_gfblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_4...") + y=pheno_df_pla$T14_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_4_gblup_variances_all[[r]]<-var + T14_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_4_gblup_variances_all[[r]]<-list() + T14_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_4_gfblup_variances_all[[r]]<-var + T14_4_gfblup_prediction_all[[r]]<-pred + T14_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_4_gblup_variances_all,"T14_4_gblup_variances_all_2001_3000.rds") +saveRDS(T14_4_gblup_prediction_all,"T14_4_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_4_gfblup_variances_all,"T14_4_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_4_gfblup_prediction_all,"T14_4_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_4_gfblup_validate_all,"T14_4_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_4_3001_4000.R b/code/using_GO/pla/T14_4_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..7ab760583879dca70dccb70ef4c5e5125cc9d33f --- /dev/null +++ b/code/using_GO/pla/T14_4_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_4_gblup_variances_all=rep(list(list()),cycles) +T14_4_gblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_variances_all=rep(list(list()),cycles) +T14_4_gfblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_4...") + y=pheno_df_pla$T14_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_4_gblup_variances_all[[r]]<-var + T14_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_4_gblup_variances_all[[r]]<-list() + T14_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_4_gfblup_variances_all[[r]]<-var + T14_4_gfblup_prediction_all[[r]]<-pred + T14_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_4_gblup_variances_all,"T14_4_gblup_variances_all_3001_4000.rds") +saveRDS(T14_4_gblup_prediction_all,"T14_4_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_4_gfblup_variances_all,"T14_4_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_4_gfblup_prediction_all,"T14_4_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_4_gfblup_validate_all,"T14_4_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_4_4001_5000.R b/code/using_GO/pla/T14_4_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..5a05ccbea0c20ed31c39514cc0f9c5e4be1cf5f5 --- /dev/null +++ b/code/using_GO/pla/T14_4_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_4_gblup_variances_all=rep(list(list()),cycles) +T14_4_gblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_variances_all=rep(list(list()),cycles) +T14_4_gfblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_4...") + y=pheno_df_pla$T14_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_4_gblup_variances_all[[r]]<-var + T14_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_4_gblup_variances_all[[r]]<-list() + T14_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_4_gfblup_variances_all[[r]]<-var + T14_4_gfblup_prediction_all[[r]]<-pred + T14_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_4_gblup_variances_all,"T14_4_gblup_variances_all_4001_5000.rds") +saveRDS(T14_4_gblup_prediction_all,"T14_4_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_4_gfblup_variances_all,"T14_4_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_4_gfblup_prediction_all,"T14_4_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_4_gfblup_validate_all,"T14_4_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_4_5001_6000.R b/code/using_GO/pla/T14_4_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..8a992cf578a32d1135af92dde20feeb67a6e44de --- /dev/null +++ b/code/using_GO/pla/T14_4_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_4_gblup_variances_all=rep(list(list()),cycles) +T14_4_gblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_variances_all=rep(list(list()),cycles) +T14_4_gfblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_4...") + y=pheno_df_pla$T14_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_4_gblup_variances_all[[r]]<-var + T14_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_4_gblup_variances_all[[r]]<-list() + T14_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_4_gfblup_variances_all[[r]]<-var + T14_4_gfblup_prediction_all[[r]]<-pred + T14_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_4_gblup_variances_all,"T14_4_gblup_variances_all_5001_6000.rds") +saveRDS(T14_4_gblup_prediction_all,"T14_4_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_4_gfblup_variances_all,"T14_4_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_4_gfblup_prediction_all,"T14_4_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_4_gfblup_validate_all,"T14_4_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_4_6001_7297.R b/code/using_GO/pla/T14_4_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..0c0e0e5bd1d532954073512b4cd4a1acb89f760c --- /dev/null +++ b/code/using_GO/pla/T14_4_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_4_gblup_variances_all=rep(list(list()),cycles) +T14_4_gblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_variances_all=rep(list(list()),cycles) +T14_4_gfblup_prediction_all=rep(list(list()),cycles) +T14_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_4...") + y=pheno_df_pla$T14_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_4_gblup_variances_all[[r]]<-var + T14_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_4_gblup_variances_all[[r]]<-list() + T14_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_4_gfblup_variances_all[[r]]<-var + T14_4_gfblup_prediction_all[[r]]<-pred + T14_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_4_gblup_variances_all,"T14_4_gblup_variances_all_6001_7297.rds") +saveRDS(T14_4_gblup_prediction_all,"T14_4_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_4_gfblup_variances_all,"T14_4_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_4_gfblup_prediction_all,"T14_4_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_4_gfblup_validate_all,"T14_4_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_5_1001_2000.R b/code/using_GO/pla/T14_5_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..6f75e58dd65429aa8858e4d508045d500324bd48 --- /dev/null +++ b/code/using_GO/pla/T14_5_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_5_gblup_variances_all=rep(list(list()),cycles) +T14_5_gblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_variances_all=rep(list(list()),cycles) +T14_5_gfblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_5...") + y=pheno_df_pla$T14_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_5_gblup_variances_all[[r]]<-var + T14_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_5_gblup_variances_all[[r]]<-list() + T14_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_5_gfblup_variances_all[[r]]<-var + T14_5_gfblup_prediction_all[[r]]<-pred + T14_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_5_gblup_variances_all,"T14_5_gblup_variances_all_1001_2000.rds") +saveRDS(T14_5_gblup_prediction_all,"T14_5_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_5_gfblup_variances_all,"T14_5_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_5_gfblup_prediction_all,"T14_5_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_5_gfblup_validate_all,"T14_5_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_5_1_1000.R b/code/using_GO/pla/T14_5_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..61189576089e82f720b5df5220d55861e72749c7 --- /dev/null +++ b/code/using_GO/pla/T14_5_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_5_gblup_variances_all=rep(list(list()),cycles) +T14_5_gblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_variances_all=rep(list(list()),cycles) +T14_5_gfblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_5...") + y=pheno_df_pla$T14_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_5_gblup_variances_all[[r]]<-var + T14_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_5_gblup_variances_all[[r]]<-list() + T14_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_5_gfblup_variances_all[[r]]<-var + T14_5_gfblup_prediction_all[[r]]<-pred + T14_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_5_gblup_variances_all,"T14_5_gblup_variances_all_1_1000.rds") +saveRDS(T14_5_gblup_prediction_all,"T14_5_gblup_prediction_all_1_1000.rds") +saveRDS(T14_5_gfblup_variances_all,"T14_5_gfblup_variances_all_1_1000.rds") +saveRDS(T14_5_gfblup_prediction_all,"T14_5_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_5_gfblup_validate_all,"T14_5_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_5_2001_3000.R b/code/using_GO/pla/T14_5_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..a5b43288f4d2d2c9ee577cc6c9be4332483ecbb2 --- /dev/null +++ b/code/using_GO/pla/T14_5_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_5_gblup_variances_all=rep(list(list()),cycles) +T14_5_gblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_variances_all=rep(list(list()),cycles) +T14_5_gfblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_5...") + y=pheno_df_pla$T14_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_5_gblup_variances_all[[r]]<-var + T14_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_5_gblup_variances_all[[r]]<-list() + T14_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_5_gfblup_variances_all[[r]]<-var + T14_5_gfblup_prediction_all[[r]]<-pred + T14_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_5_gblup_variances_all,"T14_5_gblup_variances_all_2001_3000.rds") +saveRDS(T14_5_gblup_prediction_all,"T14_5_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_5_gfblup_variances_all,"T14_5_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_5_gfblup_prediction_all,"T14_5_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_5_gfblup_validate_all,"T14_5_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_5_3001_4000.R b/code/using_GO/pla/T14_5_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..87343d18dbaa1d2a09e49fb108be8f89d6e71951 --- /dev/null +++ b/code/using_GO/pla/T14_5_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_5_gblup_variances_all=rep(list(list()),cycles) +T14_5_gblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_variances_all=rep(list(list()),cycles) +T14_5_gfblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_5...") + y=pheno_df_pla$T14_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_5_gblup_variances_all[[r]]<-var + T14_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_5_gblup_variances_all[[r]]<-list() + T14_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_5_gfblup_variances_all[[r]]<-var + T14_5_gfblup_prediction_all[[r]]<-pred + T14_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_5_gblup_variances_all,"T14_5_gblup_variances_all_3001_4000.rds") +saveRDS(T14_5_gblup_prediction_all,"T14_5_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_5_gfblup_variances_all,"T14_5_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_5_gfblup_prediction_all,"T14_5_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_5_gfblup_validate_all,"T14_5_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_5_4001_5000.R b/code/using_GO/pla/T14_5_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..a43c6dbb04471baa8fef2b78cd279be0020d44d0 --- /dev/null +++ b/code/using_GO/pla/T14_5_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_5_gblup_variances_all=rep(list(list()),cycles) +T14_5_gblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_variances_all=rep(list(list()),cycles) +T14_5_gfblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_5...") + y=pheno_df_pla$T14_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_5_gblup_variances_all[[r]]<-var + T14_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_5_gblup_variances_all[[r]]<-list() + T14_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_5_gfblup_variances_all[[r]]<-var + T14_5_gfblup_prediction_all[[r]]<-pred + T14_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_5_gblup_variances_all,"T14_5_gblup_variances_all_4001_5000.rds") +saveRDS(T14_5_gblup_prediction_all,"T14_5_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_5_gfblup_variances_all,"T14_5_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_5_gfblup_prediction_all,"T14_5_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_5_gfblup_validate_all,"T14_5_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_5_5001_6000.R b/code/using_GO/pla/T14_5_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..d6df8951a6b01a39ccc4ce7869c0828d326af3a1 --- /dev/null +++ b/code/using_GO/pla/T14_5_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_5_gblup_variances_all=rep(list(list()),cycles) +T14_5_gblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_variances_all=rep(list(list()),cycles) +T14_5_gfblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_5...") + y=pheno_df_pla$T14_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_5_gblup_variances_all[[r]]<-var + T14_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_5_gblup_variances_all[[r]]<-list() + T14_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_5_gfblup_variances_all[[r]]<-var + T14_5_gfblup_prediction_all[[r]]<-pred + T14_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_5_gblup_variances_all,"T14_5_gblup_variances_all_5001_6000.rds") +saveRDS(T14_5_gblup_prediction_all,"T14_5_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_5_gfblup_variances_all,"T14_5_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_5_gfblup_prediction_all,"T14_5_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_5_gfblup_validate_all,"T14_5_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_5_6001_7297.R b/code/using_GO/pla/T14_5_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..43081415ad495fb04830106caba96cf7f9e3967a --- /dev/null +++ b/code/using_GO/pla/T14_5_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_5_gblup_variances_all=rep(list(list()),cycles) +T14_5_gblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_variances_all=rep(list(list()),cycles) +T14_5_gfblup_prediction_all=rep(list(list()),cycles) +T14_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_5...") + y=pheno_df_pla$T14_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_5_gblup_variances_all[[r]]<-var + T14_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_5_gblup_variances_all[[r]]<-list() + T14_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_5_gfblup_variances_all[[r]]<-var + T14_5_gfblup_prediction_all[[r]]<-pred + T14_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_5_gblup_variances_all,"T14_5_gblup_variances_all_6001_7297.rds") +saveRDS(T14_5_gblup_prediction_all,"T14_5_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_5_gfblup_variances_all,"T14_5_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_5_gfblup_prediction_all,"T14_5_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_5_gfblup_validate_all,"T14_5_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_6_1001_2000.R b/code/using_GO/pla/T14_6_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..615caa4c37d841cd10ac7212aceef64b0047bb33 --- /dev/null +++ b/code/using_GO/pla/T14_6_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_6_gblup_variances_all=rep(list(list()),cycles) +T14_6_gblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_variances_all=rep(list(list()),cycles) +T14_6_gfblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_6...") + y=pheno_df_pla$T14_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_6_gblup_variances_all[[r]]<-var + T14_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_6_gblup_variances_all[[r]]<-list() + T14_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_6_gfblup_variances_all[[r]]<-var + T14_6_gfblup_prediction_all[[r]]<-pred + T14_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_6_gblup_variances_all,"T14_6_gblup_variances_all_1001_2000.rds") +saveRDS(T14_6_gblup_prediction_all,"T14_6_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_6_gfblup_variances_all,"T14_6_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_6_gfblup_prediction_all,"T14_6_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_6_gfblup_validate_all,"T14_6_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_6_1_1000.R b/code/using_GO/pla/T14_6_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..49aa03e097389e9d28c7cd63695cc5dfd0a98233 --- /dev/null +++ b/code/using_GO/pla/T14_6_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_6_gblup_variances_all=rep(list(list()),cycles) +T14_6_gblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_variances_all=rep(list(list()),cycles) +T14_6_gfblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_6...") + y=pheno_df_pla$T14_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_6_gblup_variances_all[[r]]<-var + T14_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_6_gblup_variances_all[[r]]<-list() + T14_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_6_gfblup_variances_all[[r]]<-var + T14_6_gfblup_prediction_all[[r]]<-pred + T14_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_6_gblup_variances_all,"T14_6_gblup_variances_all_1_1000.rds") +saveRDS(T14_6_gblup_prediction_all,"T14_6_gblup_prediction_all_1_1000.rds") +saveRDS(T14_6_gfblup_variances_all,"T14_6_gfblup_variances_all_1_1000.rds") +saveRDS(T14_6_gfblup_prediction_all,"T14_6_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_6_gfblup_validate_all,"T14_6_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_6_2001_3000.R b/code/using_GO/pla/T14_6_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..c6cbc4c2f092927d9cddc46bfae3041b020d3bcb --- /dev/null +++ b/code/using_GO/pla/T14_6_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_6_gblup_variances_all=rep(list(list()),cycles) +T14_6_gblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_variances_all=rep(list(list()),cycles) +T14_6_gfblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_6...") + y=pheno_df_pla$T14_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_6_gblup_variances_all[[r]]<-var + T14_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_6_gblup_variances_all[[r]]<-list() + T14_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_6_gfblup_variances_all[[r]]<-var + T14_6_gfblup_prediction_all[[r]]<-pred + T14_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_6_gblup_variances_all,"T14_6_gblup_variances_all_2001_3000.rds") +saveRDS(T14_6_gblup_prediction_all,"T14_6_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_6_gfblup_variances_all,"T14_6_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_6_gfblup_prediction_all,"T14_6_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_6_gfblup_validate_all,"T14_6_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_6_3001_4000.R b/code/using_GO/pla/T14_6_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..6fa0004adef89de829fac2c71fa2182f0c65c2b2 --- /dev/null +++ b/code/using_GO/pla/T14_6_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_6_gblup_variances_all=rep(list(list()),cycles) +T14_6_gblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_variances_all=rep(list(list()),cycles) +T14_6_gfblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_6...") + y=pheno_df_pla$T14_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_6_gblup_variances_all[[r]]<-var + T14_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_6_gblup_variances_all[[r]]<-list() + T14_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_6_gfblup_variances_all[[r]]<-var + T14_6_gfblup_prediction_all[[r]]<-pred + T14_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_6_gblup_variances_all,"T14_6_gblup_variances_all_3001_4000.rds") +saveRDS(T14_6_gblup_prediction_all,"T14_6_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_6_gfblup_variances_all,"T14_6_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_6_gfblup_prediction_all,"T14_6_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_6_gfblup_validate_all,"T14_6_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_6_4001_5000.R b/code/using_GO/pla/T14_6_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..5ff2b569bc4e31a85ddebd5d2a1936aa37a899dc --- /dev/null +++ b/code/using_GO/pla/T14_6_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_6_gblup_variances_all=rep(list(list()),cycles) +T14_6_gblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_variances_all=rep(list(list()),cycles) +T14_6_gfblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_6...") + y=pheno_df_pla$T14_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_6_gblup_variances_all[[r]]<-var + T14_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_6_gblup_variances_all[[r]]<-list() + T14_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_6_gfblup_variances_all[[r]]<-var + T14_6_gfblup_prediction_all[[r]]<-pred + T14_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_6_gblup_variances_all,"T14_6_gblup_variances_all_4001_5000.rds") +saveRDS(T14_6_gblup_prediction_all,"T14_6_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_6_gfblup_variances_all,"T14_6_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_6_gfblup_prediction_all,"T14_6_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_6_gfblup_validate_all,"T14_6_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_6_5001_6000.R b/code/using_GO/pla/T14_6_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..e9bc5e08a29d5b87419786d9668cebe24813dfab --- /dev/null +++ b/code/using_GO/pla/T14_6_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_6_gblup_variances_all=rep(list(list()),cycles) +T14_6_gblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_variances_all=rep(list(list()),cycles) +T14_6_gfblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_6...") + y=pheno_df_pla$T14_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_6_gblup_variances_all[[r]]<-var + T14_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_6_gblup_variances_all[[r]]<-list() + T14_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_6_gfblup_variances_all[[r]]<-var + T14_6_gfblup_prediction_all[[r]]<-pred + T14_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_6_gblup_variances_all,"T14_6_gblup_variances_all_5001_6000.rds") +saveRDS(T14_6_gblup_prediction_all,"T14_6_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_6_gfblup_variances_all,"T14_6_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_6_gfblup_prediction_all,"T14_6_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_6_gfblup_validate_all,"T14_6_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_6_6001_7297.R b/code/using_GO/pla/T14_6_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..c3c71cc3cc259707dbc0b93d77475f94f22ed272 --- /dev/null +++ b/code/using_GO/pla/T14_6_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_6_gblup_variances_all=rep(list(list()),cycles) +T14_6_gblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_variances_all=rep(list(list()),cycles) +T14_6_gfblup_prediction_all=rep(list(list()),cycles) +T14_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_6...") + y=pheno_df_pla$T14_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_6_gblup_variances_all[[r]]<-var + T14_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_6_gblup_variances_all[[r]]<-list() + T14_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_6_gfblup_variances_all[[r]]<-var + T14_6_gfblup_prediction_all[[r]]<-pred + T14_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_6_gblup_variances_all,"T14_6_gblup_variances_all_6001_7297.rds") +saveRDS(T14_6_gblup_prediction_all,"T14_6_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_6_gfblup_variances_all,"T14_6_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_6_gfblup_prediction_all,"T14_6_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_6_gfblup_validate_all,"T14_6_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_7_1001_2000.R b/code/using_GO/pla/T14_7_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..fb02a3aae488769455d384beede1a927a2df812f --- /dev/null +++ b/code/using_GO/pla/T14_7_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_7_gblup_variances_all=rep(list(list()),cycles) +T14_7_gblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_variances_all=rep(list(list()),cycles) +T14_7_gfblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_7...") + y=pheno_df_pla$T14_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_7_gblup_variances_all[[r]]<-var + T14_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_7_gblup_variances_all[[r]]<-list() + T14_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_7_gfblup_variances_all[[r]]<-var + T14_7_gfblup_prediction_all[[r]]<-pred + T14_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_7_gblup_variances_all,"T14_7_gblup_variances_all_1001_2000.rds") +saveRDS(T14_7_gblup_prediction_all,"T14_7_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_7_gfblup_variances_all,"T14_7_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_7_gfblup_prediction_all,"T14_7_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_7_gfblup_validate_all,"T14_7_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_7_1_1000.R b/code/using_GO/pla/T14_7_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..56d9307e51aa3fe2065725760005c5d5ace1a5e9 --- /dev/null +++ b/code/using_GO/pla/T14_7_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_7_gblup_variances_all=rep(list(list()),cycles) +T14_7_gblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_variances_all=rep(list(list()),cycles) +T14_7_gfblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_7...") + y=pheno_df_pla$T14_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_7_gblup_variances_all[[r]]<-var + T14_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_7_gblup_variances_all[[r]]<-list() + T14_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_7_gfblup_variances_all[[r]]<-var + T14_7_gfblup_prediction_all[[r]]<-pred + T14_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_7_gblup_variances_all,"T14_7_gblup_variances_all_1_1000.rds") +saveRDS(T14_7_gblup_prediction_all,"T14_7_gblup_prediction_all_1_1000.rds") +saveRDS(T14_7_gfblup_variances_all,"T14_7_gfblup_variances_all_1_1000.rds") +saveRDS(T14_7_gfblup_prediction_all,"T14_7_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_7_gfblup_validate_all,"T14_7_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_7_2001_3000.R b/code/using_GO/pla/T14_7_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..ec3844ffe386cc6ccf952406dd96d0ca6c82dbc0 --- /dev/null +++ b/code/using_GO/pla/T14_7_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_7_gblup_variances_all=rep(list(list()),cycles) +T14_7_gblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_variances_all=rep(list(list()),cycles) +T14_7_gfblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_7...") + y=pheno_df_pla$T14_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_7_gblup_variances_all[[r]]<-var + T14_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_7_gblup_variances_all[[r]]<-list() + T14_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_7_gfblup_variances_all[[r]]<-var + T14_7_gfblup_prediction_all[[r]]<-pred + T14_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_7_gblup_variances_all,"T14_7_gblup_variances_all_2001_3000.rds") +saveRDS(T14_7_gblup_prediction_all,"T14_7_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_7_gfblup_variances_all,"T14_7_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_7_gfblup_prediction_all,"T14_7_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_7_gfblup_validate_all,"T14_7_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_7_3001_4000.R b/code/using_GO/pla/T14_7_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..59ca235e6bff3d87006908c070f4e39135c89bf6 --- /dev/null +++ b/code/using_GO/pla/T14_7_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_7_gblup_variances_all=rep(list(list()),cycles) +T14_7_gblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_variances_all=rep(list(list()),cycles) +T14_7_gfblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_7...") + y=pheno_df_pla$T14_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_7_gblup_variances_all[[r]]<-var + T14_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_7_gblup_variances_all[[r]]<-list() + T14_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_7_gfblup_variances_all[[r]]<-var + T14_7_gfblup_prediction_all[[r]]<-pred + T14_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_7_gblup_variances_all,"T14_7_gblup_variances_all_3001_4000.rds") +saveRDS(T14_7_gblup_prediction_all,"T14_7_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_7_gfblup_variances_all,"T14_7_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_7_gfblup_prediction_all,"T14_7_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_7_gfblup_validate_all,"T14_7_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_7_4001_5000.R b/code/using_GO/pla/T14_7_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..a9742f2cba1db8a960a5c23c1998c82d62c870d6 --- /dev/null +++ b/code/using_GO/pla/T14_7_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_7_gblup_variances_all=rep(list(list()),cycles) +T14_7_gblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_variances_all=rep(list(list()),cycles) +T14_7_gfblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_7...") + y=pheno_df_pla$T14_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_7_gblup_variances_all[[r]]<-var + T14_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_7_gblup_variances_all[[r]]<-list() + T14_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_7_gfblup_variances_all[[r]]<-var + T14_7_gfblup_prediction_all[[r]]<-pred + T14_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_7_gblup_variances_all,"T14_7_gblup_variances_all_4001_5000.rds") +saveRDS(T14_7_gblup_prediction_all,"T14_7_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_7_gfblup_variances_all,"T14_7_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_7_gfblup_prediction_all,"T14_7_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_7_gfblup_validate_all,"T14_7_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_7_5001_6000.R b/code/using_GO/pla/T14_7_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..2a794aaad3490c54908cca89dad88b43c47ea20b --- /dev/null +++ b/code/using_GO/pla/T14_7_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_7_gblup_variances_all=rep(list(list()),cycles) +T14_7_gblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_variances_all=rep(list(list()),cycles) +T14_7_gfblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_7...") + y=pheno_df_pla$T14_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_7_gblup_variances_all[[r]]<-var + T14_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_7_gblup_variances_all[[r]]<-list() + T14_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_7_gfblup_variances_all[[r]]<-var + T14_7_gfblup_prediction_all[[r]]<-pred + T14_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_7_gblup_variances_all,"T14_7_gblup_variances_all_5001_6000.rds") +saveRDS(T14_7_gblup_prediction_all,"T14_7_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_7_gfblup_variances_all,"T14_7_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_7_gfblup_prediction_all,"T14_7_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_7_gfblup_validate_all,"T14_7_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_7_6001_7297.R b/code/using_GO/pla/T14_7_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..378da97ccd638196f31636d382c60a6e77ca988d --- /dev/null +++ b/code/using_GO/pla/T14_7_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_7_gblup_variances_all=rep(list(list()),cycles) +T14_7_gblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_variances_all=rep(list(list()),cycles) +T14_7_gfblup_prediction_all=rep(list(list()),cycles) +T14_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_7...") + y=pheno_df_pla$T14_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_7_gblup_variances_all[[r]]<-var + T14_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_7_gblup_variances_all[[r]]<-list() + T14_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_7_gfblup_variances_all[[r]]<-var + T14_7_gfblup_prediction_all[[r]]<-pred + T14_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_7_gblup_variances_all,"T14_7_gblup_variances_all_6001_7297.rds") +saveRDS(T14_7_gblup_prediction_all,"T14_7_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_7_gfblup_variances_all,"T14_7_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_7_gfblup_prediction_all,"T14_7_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_7_gfblup_validate_all,"T14_7_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_8_1001_2000.R b/code/using_GO/pla/T14_8_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..2b248378bf3588b08fc739a78504e1504cfd6b55 --- /dev/null +++ b/code/using_GO/pla/T14_8_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_8_gblup_variances_all=rep(list(list()),cycles) +T14_8_gblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_variances_all=rep(list(list()),cycles) +T14_8_gfblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_8...") + y=pheno_df_pla$T14_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_8_gblup_variances_all[[r]]<-var + T14_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_8_gblup_variances_all[[r]]<-list() + T14_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_8_gfblup_variances_all[[r]]<-var + T14_8_gfblup_prediction_all[[r]]<-pred + T14_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_8_gblup_variances_all,"T14_8_gblup_variances_all_1001_2000.rds") +saveRDS(T14_8_gblup_prediction_all,"T14_8_gblup_prediction_all_1001_2000.rds") +saveRDS(T14_8_gfblup_variances_all,"T14_8_gfblup_variances_all_1001_2000.rds") +saveRDS(T14_8_gfblup_prediction_all,"T14_8_gfblup_prediction_all_1001_2000.rds") +saveRDS(T14_8_gfblup_validate_all,"T14_8_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_8_1_1000.R b/code/using_GO/pla/T14_8_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..7c10b96bb1c9a8d736babf7dae01182ab3bb39a1 --- /dev/null +++ b/code/using_GO/pla/T14_8_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_8_gblup_variances_all=rep(list(list()),cycles) +T14_8_gblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_variances_all=rep(list(list()),cycles) +T14_8_gfblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_8...") + y=pheno_df_pla$T14_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_8_gblup_variances_all[[r]]<-var + T14_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_8_gblup_variances_all[[r]]<-list() + T14_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_8_gfblup_variances_all[[r]]<-var + T14_8_gfblup_prediction_all[[r]]<-pred + T14_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_8_gblup_variances_all,"T14_8_gblup_variances_all_1_1000.rds") +saveRDS(T14_8_gblup_prediction_all,"T14_8_gblup_prediction_all_1_1000.rds") +saveRDS(T14_8_gfblup_variances_all,"T14_8_gfblup_variances_all_1_1000.rds") +saveRDS(T14_8_gfblup_prediction_all,"T14_8_gfblup_prediction_all_1_1000.rds") +saveRDS(T14_8_gfblup_validate_all,"T14_8_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_8_2001_3000.R b/code/using_GO/pla/T14_8_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..723ac35e11a964a74b8d4488e6975d16626295fc --- /dev/null +++ b/code/using_GO/pla/T14_8_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_8_gblup_variances_all=rep(list(list()),cycles) +T14_8_gblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_variances_all=rep(list(list()),cycles) +T14_8_gfblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_8...") + y=pheno_df_pla$T14_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_8_gblup_variances_all[[r]]<-var + T14_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_8_gblup_variances_all[[r]]<-list() + T14_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_8_gfblup_variances_all[[r]]<-var + T14_8_gfblup_prediction_all[[r]]<-pred + T14_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_8_gblup_variances_all,"T14_8_gblup_variances_all_2001_3000.rds") +saveRDS(T14_8_gblup_prediction_all,"T14_8_gblup_prediction_all_2001_3000.rds") +saveRDS(T14_8_gfblup_variances_all,"T14_8_gfblup_variances_all_2001_3000.rds") +saveRDS(T14_8_gfblup_prediction_all,"T14_8_gfblup_prediction_all_2001_3000.rds") +saveRDS(T14_8_gfblup_validate_all,"T14_8_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_8_3001_4000.R b/code/using_GO/pla/T14_8_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..5d2d962e90a0f7340111b1a6feb828260207f5d4 --- /dev/null +++ b/code/using_GO/pla/T14_8_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_8_gblup_variances_all=rep(list(list()),cycles) +T14_8_gblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_variances_all=rep(list(list()),cycles) +T14_8_gfblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_8...") + y=pheno_df_pla$T14_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_8_gblup_variances_all[[r]]<-var + T14_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_8_gblup_variances_all[[r]]<-list() + T14_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_8_gfblup_variances_all[[r]]<-var + T14_8_gfblup_prediction_all[[r]]<-pred + T14_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_8_gblup_variances_all,"T14_8_gblup_variances_all_3001_4000.rds") +saveRDS(T14_8_gblup_prediction_all,"T14_8_gblup_prediction_all_3001_4000.rds") +saveRDS(T14_8_gfblup_variances_all,"T14_8_gfblup_variances_all_3001_4000.rds") +saveRDS(T14_8_gfblup_prediction_all,"T14_8_gfblup_prediction_all_3001_4000.rds") +saveRDS(T14_8_gfblup_validate_all,"T14_8_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_8_4001_5000.R b/code/using_GO/pla/T14_8_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..000023127afae0ef854df9719f1ebcd1741fbc8d --- /dev/null +++ b/code/using_GO/pla/T14_8_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_8_gblup_variances_all=rep(list(list()),cycles) +T14_8_gblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_variances_all=rep(list(list()),cycles) +T14_8_gfblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_8...") + y=pheno_df_pla$T14_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_8_gblup_variances_all[[r]]<-var + T14_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_8_gblup_variances_all[[r]]<-list() + T14_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_8_gfblup_variances_all[[r]]<-var + T14_8_gfblup_prediction_all[[r]]<-pred + T14_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_8_gblup_variances_all,"T14_8_gblup_variances_all_4001_5000.rds") +saveRDS(T14_8_gblup_prediction_all,"T14_8_gblup_prediction_all_4001_5000.rds") +saveRDS(T14_8_gfblup_variances_all,"T14_8_gfblup_variances_all_4001_5000.rds") +saveRDS(T14_8_gfblup_prediction_all,"T14_8_gfblup_prediction_all_4001_5000.rds") +saveRDS(T14_8_gfblup_validate_all,"T14_8_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_8_5001_6000.R b/code/using_GO/pla/T14_8_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..ff599fc4b7f09ff251b5870f8b482631f55e1367 --- /dev/null +++ b/code/using_GO/pla/T14_8_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_8_gblup_variances_all=rep(list(list()),cycles) +T14_8_gblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_variances_all=rep(list(list()),cycles) +T14_8_gfblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_8...") + y=pheno_df_pla$T14_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_8_gblup_variances_all[[r]]<-var + T14_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_8_gblup_variances_all[[r]]<-list() + T14_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_8_gfblup_variances_all[[r]]<-var + T14_8_gfblup_prediction_all[[r]]<-pred + T14_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_8_gblup_variances_all,"T14_8_gblup_variances_all_5001_6000.rds") +saveRDS(T14_8_gblup_prediction_all,"T14_8_gblup_prediction_all_5001_6000.rds") +saveRDS(T14_8_gfblup_variances_all,"T14_8_gfblup_variances_all_5001_6000.rds") +saveRDS(T14_8_gfblup_prediction_all,"T14_8_gfblup_prediction_all_5001_6000.rds") +saveRDS(T14_8_gfblup_validate_all,"T14_8_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T14_8_6001_7297.R b/code/using_GO/pla/T14_8_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..3a0790d2c95ffbf868f6e7f95ed421aa0ec68587 --- /dev/null +++ b/code/using_GO/pla/T14_8_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T14_8_gblup_variances_all=rep(list(list()),cycles) +T14_8_gblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_variances_all=rep(list(list()),cycles) +T14_8_gfblup_prediction_all=rep(list(list()),cycles) +T14_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T14_8...") + y=pheno_df_pla$T14_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T14_8_gblup_variances_all[[r]]<-var + T14_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T14_8_gblup_variances_all[[r]]<-list() + T14_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T14_8_gfblup_variances_all[[r]]<-var + T14_8_gfblup_prediction_all[[r]]<-pred + T14_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T14_8_gblup_variances_all,"T14_8_gblup_variances_all_6001_7297.rds") +saveRDS(T14_8_gblup_prediction_all,"T14_8_gblup_prediction_all_6001_7297.rds") +saveRDS(T14_8_gfblup_variances_all,"T14_8_gfblup_variances_all_6001_7297.rds") +saveRDS(T14_8_gfblup_prediction_all,"T14_8_gfblup_prediction_all_6001_7297.rds") +saveRDS(T14_8_gfblup_validate_all,"T14_8_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_1_1001_2000.R b/code/using_GO/pla/T15_1_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..39066777dded5a36f535d8c198a3b35254c82d20 --- /dev/null +++ b/code/using_GO/pla/T15_1_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_1_gblup_variances_all=rep(list(list()),cycles) +T15_1_gblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_variances_all=rep(list(list()),cycles) +T15_1_gfblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_1...") + y=pheno_df_pla$T15_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_1_gblup_variances_all[[r]]<-var + T15_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_1_gblup_variances_all[[r]]<-list() + T15_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_1_gfblup_variances_all[[r]]<-var + T15_1_gfblup_prediction_all[[r]]<-pred + T15_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_1_gblup_variances_all,"T15_1_gblup_variances_all_1001_2000.rds") +saveRDS(T15_1_gblup_prediction_all,"T15_1_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_1_gfblup_variances_all,"T15_1_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_1_gfblup_prediction_all,"T15_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_1_gfblup_validate_all,"T15_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_1_1_1000.R b/code/using_GO/pla/T15_1_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..461709bb078b19b4e99e2a05f5055281f946c4af --- /dev/null +++ b/code/using_GO/pla/T15_1_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_1_gblup_variances_all=rep(list(list()),cycles) +T15_1_gblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_variances_all=rep(list(list()),cycles) +T15_1_gfblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_1...") + y=pheno_df_pla$T15_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_1_gblup_variances_all[[r]]<-var + T15_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_1_gblup_variances_all[[r]]<-list() + T15_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_1_gfblup_variances_all[[r]]<-var + T15_1_gfblup_prediction_all[[r]]<-pred + T15_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_1_gblup_variances_all,"T15_1_gblup_variances_all_1_1000.rds") +saveRDS(T15_1_gblup_prediction_all,"T15_1_gblup_prediction_all_1_1000.rds") +saveRDS(T15_1_gfblup_variances_all,"T15_1_gfblup_variances_all_1_1000.rds") +saveRDS(T15_1_gfblup_prediction_all,"T15_1_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_1_gfblup_validate_all,"T15_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_1_2001_3000.R b/code/using_GO/pla/T15_1_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..ef4eac42a690dcc2e735ea7047d5a98e9cb9f120 --- /dev/null +++ b/code/using_GO/pla/T15_1_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_1_gblup_variances_all=rep(list(list()),cycles) +T15_1_gblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_variances_all=rep(list(list()),cycles) +T15_1_gfblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_1...") + y=pheno_df_pla$T15_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_1_gblup_variances_all[[r]]<-var + T15_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_1_gblup_variances_all[[r]]<-list() + T15_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_1_gfblup_variances_all[[r]]<-var + T15_1_gfblup_prediction_all[[r]]<-pred + T15_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_1_gblup_variances_all,"T15_1_gblup_variances_all_2001_3000.rds") +saveRDS(T15_1_gblup_prediction_all,"T15_1_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_1_gfblup_variances_all,"T15_1_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_1_gfblup_prediction_all,"T15_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_1_gfblup_validate_all,"T15_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_1_3001_4000.R b/code/using_GO/pla/T15_1_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..3a101f1d0a9fd4f60bd034570f65e6be9be24038 --- /dev/null +++ b/code/using_GO/pla/T15_1_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_1_gblup_variances_all=rep(list(list()),cycles) +T15_1_gblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_variances_all=rep(list(list()),cycles) +T15_1_gfblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_1...") + y=pheno_df_pla$T15_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_1_gblup_variances_all[[r]]<-var + T15_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_1_gblup_variances_all[[r]]<-list() + T15_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_1_gfblup_variances_all[[r]]<-var + T15_1_gfblup_prediction_all[[r]]<-pred + T15_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_1_gblup_variances_all,"T15_1_gblup_variances_all_3001_4000.rds") +saveRDS(T15_1_gblup_prediction_all,"T15_1_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_1_gfblup_variances_all,"T15_1_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_1_gfblup_prediction_all,"T15_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_1_gfblup_validate_all,"T15_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_1_4001_5000.R b/code/using_GO/pla/T15_1_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..8258197e4e77441eca0d54dd162ead377d37e93d --- /dev/null +++ b/code/using_GO/pla/T15_1_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_1_gblup_variances_all=rep(list(list()),cycles) +T15_1_gblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_variances_all=rep(list(list()),cycles) +T15_1_gfblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_1...") + y=pheno_df_pla$T15_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_1_gblup_variances_all[[r]]<-var + T15_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_1_gblup_variances_all[[r]]<-list() + T15_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_1_gfblup_variances_all[[r]]<-var + T15_1_gfblup_prediction_all[[r]]<-pred + T15_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_1_gblup_variances_all,"T15_1_gblup_variances_all_4001_5000.rds") +saveRDS(T15_1_gblup_prediction_all,"T15_1_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_1_gfblup_variances_all,"T15_1_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_1_gfblup_prediction_all,"T15_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_1_gfblup_validate_all,"T15_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_1_5001_6000.R b/code/using_GO/pla/T15_1_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..d5998cfd5d5c2ce3566b9417b43933847e96e794 --- /dev/null +++ b/code/using_GO/pla/T15_1_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_1_gblup_variances_all=rep(list(list()),cycles) +T15_1_gblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_variances_all=rep(list(list()),cycles) +T15_1_gfblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_1...") + y=pheno_df_pla$T15_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_1_gblup_variances_all[[r]]<-var + T15_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_1_gblup_variances_all[[r]]<-list() + T15_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_1_gfblup_variances_all[[r]]<-var + T15_1_gfblup_prediction_all[[r]]<-pred + T15_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_1_gblup_variances_all,"T15_1_gblup_variances_all_5001_6000.rds") +saveRDS(T15_1_gblup_prediction_all,"T15_1_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_1_gfblup_variances_all,"T15_1_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_1_gfblup_prediction_all,"T15_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_1_gfblup_validate_all,"T15_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_1_6001_7297.R b/code/using_GO/pla/T15_1_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..27cae6a4f8d7fd42d5ef95c8cd3c26a5d8afd939 --- /dev/null +++ b/code/using_GO/pla/T15_1_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_1_gblup_variances_all=rep(list(list()),cycles) +T15_1_gblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_variances_all=rep(list(list()),cycles) +T15_1_gfblup_prediction_all=rep(list(list()),cycles) +T15_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_1...") + y=pheno_df_pla$T15_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_1_gblup_variances_all[[r]]<-var + T15_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_1_gblup_variances_all[[r]]<-list() + T15_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_1_gfblup_variances_all[[r]]<-var + T15_1_gfblup_prediction_all[[r]]<-pred + T15_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_1_gblup_variances_all,"T15_1_gblup_variances_all_6001_7297.rds") +saveRDS(T15_1_gblup_prediction_all,"T15_1_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_1_gfblup_variances_all,"T15_1_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_1_gfblup_prediction_all,"T15_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_1_gfblup_validate_all,"T15_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_2_1001_2000.R b/code/using_GO/pla/T15_2_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..708769f34a4d7475092a8ae6730c52a92872c1a9 --- /dev/null +++ b/code/using_GO/pla/T15_2_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_2_gblup_variances_all=rep(list(list()),cycles) +T15_2_gblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_variances_all=rep(list(list()),cycles) +T15_2_gfblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_2...") + y=pheno_df_pla$T15_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_2_gblup_variances_all[[r]]<-var + T15_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_2_gblup_variances_all[[r]]<-list() + T15_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_2_gfblup_variances_all[[r]]<-var + T15_2_gfblup_prediction_all[[r]]<-pred + T15_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_2_gblup_variances_all,"T15_2_gblup_variances_all_1001_2000.rds") +saveRDS(T15_2_gblup_prediction_all,"T15_2_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_2_gfblup_variances_all,"T15_2_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_2_gfblup_prediction_all,"T15_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_2_gfblup_validate_all,"T15_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_2_1_1000.R b/code/using_GO/pla/T15_2_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..05caeebbd7b27a9c2726900bd0680ffef4ca35a1 --- /dev/null +++ b/code/using_GO/pla/T15_2_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_2_gblup_variances_all=rep(list(list()),cycles) +T15_2_gblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_variances_all=rep(list(list()),cycles) +T15_2_gfblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_2...") + y=pheno_df_pla$T15_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_2_gblup_variances_all[[r]]<-var + T15_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_2_gblup_variances_all[[r]]<-list() + T15_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_2_gfblup_variances_all[[r]]<-var + T15_2_gfblup_prediction_all[[r]]<-pred + T15_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_2_gblup_variances_all,"T15_2_gblup_variances_all_1_1000.rds") +saveRDS(T15_2_gblup_prediction_all,"T15_2_gblup_prediction_all_1_1000.rds") +saveRDS(T15_2_gfblup_variances_all,"T15_2_gfblup_variances_all_1_1000.rds") +saveRDS(T15_2_gfblup_prediction_all,"T15_2_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_2_gfblup_validate_all,"T15_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_2_2001_3000.R b/code/using_GO/pla/T15_2_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..defc89944050f3ff62478d598a6a19acfaa7d45d --- /dev/null +++ b/code/using_GO/pla/T15_2_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_2_gblup_variances_all=rep(list(list()),cycles) +T15_2_gblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_variances_all=rep(list(list()),cycles) +T15_2_gfblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_2...") + y=pheno_df_pla$T15_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_2_gblup_variances_all[[r]]<-var + T15_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_2_gblup_variances_all[[r]]<-list() + T15_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_2_gfblup_variances_all[[r]]<-var + T15_2_gfblup_prediction_all[[r]]<-pred + T15_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_2_gblup_variances_all,"T15_2_gblup_variances_all_2001_3000.rds") +saveRDS(T15_2_gblup_prediction_all,"T15_2_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_2_gfblup_variances_all,"T15_2_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_2_gfblup_prediction_all,"T15_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_2_gfblup_validate_all,"T15_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_2_3001_4000.R b/code/using_GO/pla/T15_2_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..4eac70cf215cd3a32c0d5fd6384d519ac7f78719 --- /dev/null +++ b/code/using_GO/pla/T15_2_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_2_gblup_variances_all=rep(list(list()),cycles) +T15_2_gblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_variances_all=rep(list(list()),cycles) +T15_2_gfblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_2...") + y=pheno_df_pla$T15_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_2_gblup_variances_all[[r]]<-var + T15_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_2_gblup_variances_all[[r]]<-list() + T15_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_2_gfblup_variances_all[[r]]<-var + T15_2_gfblup_prediction_all[[r]]<-pred + T15_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_2_gblup_variances_all,"T15_2_gblup_variances_all_3001_4000.rds") +saveRDS(T15_2_gblup_prediction_all,"T15_2_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_2_gfblup_variances_all,"T15_2_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_2_gfblup_prediction_all,"T15_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_2_gfblup_validate_all,"T15_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_2_4001_5000.R b/code/using_GO/pla/T15_2_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..fbac682d365b3245988e5dff7428e42c48f87e0d --- /dev/null +++ b/code/using_GO/pla/T15_2_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_2_gblup_variances_all=rep(list(list()),cycles) +T15_2_gblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_variances_all=rep(list(list()),cycles) +T15_2_gfblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_2...") + y=pheno_df_pla$T15_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_2_gblup_variances_all[[r]]<-var + T15_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_2_gblup_variances_all[[r]]<-list() + T15_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_2_gfblup_variances_all[[r]]<-var + T15_2_gfblup_prediction_all[[r]]<-pred + T15_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_2_gblup_variances_all,"T15_2_gblup_variances_all_4001_5000.rds") +saveRDS(T15_2_gblup_prediction_all,"T15_2_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_2_gfblup_variances_all,"T15_2_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_2_gfblup_prediction_all,"T15_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_2_gfblup_validate_all,"T15_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_2_5001_6000.R b/code/using_GO/pla/T15_2_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..37ee8d403e63fbf1553443d751fef570433a2957 --- /dev/null +++ b/code/using_GO/pla/T15_2_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_2_gblup_variances_all=rep(list(list()),cycles) +T15_2_gblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_variances_all=rep(list(list()),cycles) +T15_2_gfblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_2...") + y=pheno_df_pla$T15_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_2_gblup_variances_all[[r]]<-var + T15_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_2_gblup_variances_all[[r]]<-list() + T15_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_2_gfblup_variances_all[[r]]<-var + T15_2_gfblup_prediction_all[[r]]<-pred + T15_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_2_gblup_variances_all,"T15_2_gblup_variances_all_5001_6000.rds") +saveRDS(T15_2_gblup_prediction_all,"T15_2_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_2_gfblup_variances_all,"T15_2_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_2_gfblup_prediction_all,"T15_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_2_gfblup_validate_all,"T15_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_2_6001_7297.R b/code/using_GO/pla/T15_2_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..036845f5ead31c5319ee47db47f73032bb0f1dde --- /dev/null +++ b/code/using_GO/pla/T15_2_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_2_gblup_variances_all=rep(list(list()),cycles) +T15_2_gblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_variances_all=rep(list(list()),cycles) +T15_2_gfblup_prediction_all=rep(list(list()),cycles) +T15_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_2...") + y=pheno_df_pla$T15_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_2_gblup_variances_all[[r]]<-var + T15_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_2_gblup_variances_all[[r]]<-list() + T15_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_2_gfblup_variances_all[[r]]<-var + T15_2_gfblup_prediction_all[[r]]<-pred + T15_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_2_gblup_variances_all,"T15_2_gblup_variances_all_6001_7297.rds") +saveRDS(T15_2_gblup_prediction_all,"T15_2_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_2_gfblup_variances_all,"T15_2_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_2_gfblup_prediction_all,"T15_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_2_gfblup_validate_all,"T15_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_3_1001_2000.R b/code/using_GO/pla/T15_3_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..022bfccf5e4a6e4bcbb78c0f2c5c9eb3ae2753c1 --- /dev/null +++ b/code/using_GO/pla/T15_3_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_3_gblup_variances_all=rep(list(list()),cycles) +T15_3_gblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_variances_all=rep(list(list()),cycles) +T15_3_gfblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_3...") + y=pheno_df_pla$T15_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_3_gblup_variances_all[[r]]<-var + T15_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_3_gblup_variances_all[[r]]<-list() + T15_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_3_gfblup_variances_all[[r]]<-var + T15_3_gfblup_prediction_all[[r]]<-pred + T15_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_3_gblup_variances_all,"T15_3_gblup_variances_all_1001_2000.rds") +saveRDS(T15_3_gblup_prediction_all,"T15_3_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_3_gfblup_variances_all,"T15_3_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_3_gfblup_prediction_all,"T15_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_3_gfblup_validate_all,"T15_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_3_1_1000.R b/code/using_GO/pla/T15_3_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..28741e3f334cedc041242be463e56baea005544f --- /dev/null +++ b/code/using_GO/pla/T15_3_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_3_gblup_variances_all=rep(list(list()),cycles) +T15_3_gblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_variances_all=rep(list(list()),cycles) +T15_3_gfblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_3...") + y=pheno_df_pla$T15_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_3_gblup_variances_all[[r]]<-var + T15_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_3_gblup_variances_all[[r]]<-list() + T15_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_3_gfblup_variances_all[[r]]<-var + T15_3_gfblup_prediction_all[[r]]<-pred + T15_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_3_gblup_variances_all,"T15_3_gblup_variances_all_1_1000.rds") +saveRDS(T15_3_gblup_prediction_all,"T15_3_gblup_prediction_all_1_1000.rds") +saveRDS(T15_3_gfblup_variances_all,"T15_3_gfblup_variances_all_1_1000.rds") +saveRDS(T15_3_gfblup_prediction_all,"T15_3_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_3_gfblup_validate_all,"T15_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_3_2001_3000.R b/code/using_GO/pla/T15_3_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..06b102d5dab10e0f8c172a0dd21c90446dbd1e63 --- /dev/null +++ b/code/using_GO/pla/T15_3_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_3_gblup_variances_all=rep(list(list()),cycles) +T15_3_gblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_variances_all=rep(list(list()),cycles) +T15_3_gfblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_3...") + y=pheno_df_pla$T15_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_3_gblup_variances_all[[r]]<-var + T15_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_3_gblup_variances_all[[r]]<-list() + T15_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_3_gfblup_variances_all[[r]]<-var + T15_3_gfblup_prediction_all[[r]]<-pred + T15_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_3_gblup_variances_all,"T15_3_gblup_variances_all_2001_3000.rds") +saveRDS(T15_3_gblup_prediction_all,"T15_3_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_3_gfblup_variances_all,"T15_3_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_3_gfblup_prediction_all,"T15_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_3_gfblup_validate_all,"T15_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_3_3001_4000.R b/code/using_GO/pla/T15_3_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..24a12e53a1e9ed21ad24ed9676386824c2f71e3d --- /dev/null +++ b/code/using_GO/pla/T15_3_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_3_gblup_variances_all=rep(list(list()),cycles) +T15_3_gblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_variances_all=rep(list(list()),cycles) +T15_3_gfblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_3...") + y=pheno_df_pla$T15_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_3_gblup_variances_all[[r]]<-var + T15_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_3_gblup_variances_all[[r]]<-list() + T15_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_3_gfblup_variances_all[[r]]<-var + T15_3_gfblup_prediction_all[[r]]<-pred + T15_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_3_gblup_variances_all,"T15_3_gblup_variances_all_3001_4000.rds") +saveRDS(T15_3_gblup_prediction_all,"T15_3_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_3_gfblup_variances_all,"T15_3_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_3_gfblup_prediction_all,"T15_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_3_gfblup_validate_all,"T15_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_3_4001_5000.R b/code/using_GO/pla/T15_3_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..ab79059eebe8705c29852e7cc7d0a9c8b6cdd421 --- /dev/null +++ b/code/using_GO/pla/T15_3_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_3_gblup_variances_all=rep(list(list()),cycles) +T15_3_gblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_variances_all=rep(list(list()),cycles) +T15_3_gfblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_3...") + y=pheno_df_pla$T15_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_3_gblup_variances_all[[r]]<-var + T15_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_3_gblup_variances_all[[r]]<-list() + T15_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_3_gfblup_variances_all[[r]]<-var + T15_3_gfblup_prediction_all[[r]]<-pred + T15_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_3_gblup_variances_all,"T15_3_gblup_variances_all_4001_5000.rds") +saveRDS(T15_3_gblup_prediction_all,"T15_3_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_3_gfblup_variances_all,"T15_3_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_3_gfblup_prediction_all,"T15_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_3_gfblup_validate_all,"T15_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_3_5001_6000.R b/code/using_GO/pla/T15_3_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..40ca42f2d79926656b932cc0bfc01a9e57bd93aa --- /dev/null +++ b/code/using_GO/pla/T15_3_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_3_gblup_variances_all=rep(list(list()),cycles) +T15_3_gblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_variances_all=rep(list(list()),cycles) +T15_3_gfblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_3...") + y=pheno_df_pla$T15_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_3_gblup_variances_all[[r]]<-var + T15_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_3_gblup_variances_all[[r]]<-list() + T15_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_3_gfblup_variances_all[[r]]<-var + T15_3_gfblup_prediction_all[[r]]<-pred + T15_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_3_gblup_variances_all,"T15_3_gblup_variances_all_5001_6000.rds") +saveRDS(T15_3_gblup_prediction_all,"T15_3_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_3_gfblup_variances_all,"T15_3_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_3_gfblup_prediction_all,"T15_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_3_gfblup_validate_all,"T15_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_3_6001_7297.R b/code/using_GO/pla/T15_3_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..b9945bc0bf7005d4d2112c2b8e048de596216dca --- /dev/null +++ b/code/using_GO/pla/T15_3_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_3_gblup_variances_all=rep(list(list()),cycles) +T15_3_gblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_variances_all=rep(list(list()),cycles) +T15_3_gfblup_prediction_all=rep(list(list()),cycles) +T15_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_3...") + y=pheno_df_pla$T15_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_3_gblup_variances_all[[r]]<-var + T15_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_3_gblup_variances_all[[r]]<-list() + T15_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_3_gfblup_variances_all[[r]]<-var + T15_3_gfblup_prediction_all[[r]]<-pred + T15_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_3_gblup_variances_all,"T15_3_gblup_variances_all_6001_7297.rds") +saveRDS(T15_3_gblup_prediction_all,"T15_3_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_3_gfblup_variances_all,"T15_3_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_3_gfblup_prediction_all,"T15_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_3_gfblup_validate_all,"T15_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_4_1001_2000.R b/code/using_GO/pla/T15_4_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..2b4d312fea5e0267b044a7df30dd202307e47966 --- /dev/null +++ b/code/using_GO/pla/T15_4_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_4_gblup_variances_all=rep(list(list()),cycles) +T15_4_gblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_variances_all=rep(list(list()),cycles) +T15_4_gfblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_4...") + y=pheno_df_pla$T15_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_4_gblup_variances_all[[r]]<-var + T15_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_4_gblup_variances_all[[r]]<-list() + T15_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_4_gfblup_variances_all[[r]]<-var + T15_4_gfblup_prediction_all[[r]]<-pred + T15_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_4_gblup_variances_all,"T15_4_gblup_variances_all_1001_2000.rds") +saveRDS(T15_4_gblup_prediction_all,"T15_4_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_4_gfblup_variances_all,"T15_4_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_4_gfblup_prediction_all,"T15_4_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_4_gfblup_validate_all,"T15_4_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_4_1_1000.R b/code/using_GO/pla/T15_4_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..056cb6baaae5c119f25d8db3109afcae27317414 --- /dev/null +++ b/code/using_GO/pla/T15_4_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_4_gblup_variances_all=rep(list(list()),cycles) +T15_4_gblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_variances_all=rep(list(list()),cycles) +T15_4_gfblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_4...") + y=pheno_df_pla$T15_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_4_gblup_variances_all[[r]]<-var + T15_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_4_gblup_variances_all[[r]]<-list() + T15_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_4_gfblup_variances_all[[r]]<-var + T15_4_gfblup_prediction_all[[r]]<-pred + T15_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_4_gblup_variances_all,"T15_4_gblup_variances_all_1_1000.rds") +saveRDS(T15_4_gblup_prediction_all,"T15_4_gblup_prediction_all_1_1000.rds") +saveRDS(T15_4_gfblup_variances_all,"T15_4_gfblup_variances_all_1_1000.rds") +saveRDS(T15_4_gfblup_prediction_all,"T15_4_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_4_gfblup_validate_all,"T15_4_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_4_2001_3000.R b/code/using_GO/pla/T15_4_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..8e4e371b371a07bdd3aa66e6487f7f01debad932 --- /dev/null +++ b/code/using_GO/pla/T15_4_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_4_gblup_variances_all=rep(list(list()),cycles) +T15_4_gblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_variances_all=rep(list(list()),cycles) +T15_4_gfblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_4...") + y=pheno_df_pla$T15_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_4_gblup_variances_all[[r]]<-var + T15_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_4_gblup_variances_all[[r]]<-list() + T15_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_4_gfblup_variances_all[[r]]<-var + T15_4_gfblup_prediction_all[[r]]<-pred + T15_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_4_gblup_variances_all,"T15_4_gblup_variances_all_2001_3000.rds") +saveRDS(T15_4_gblup_prediction_all,"T15_4_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_4_gfblup_variances_all,"T15_4_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_4_gfblup_prediction_all,"T15_4_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_4_gfblup_validate_all,"T15_4_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_4_3001_4000.R b/code/using_GO/pla/T15_4_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..21bb254968025c8612e2f389df283c5c7313d6cf --- /dev/null +++ b/code/using_GO/pla/T15_4_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_4_gblup_variances_all=rep(list(list()),cycles) +T15_4_gblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_variances_all=rep(list(list()),cycles) +T15_4_gfblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_4...") + y=pheno_df_pla$T15_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_4_gblup_variances_all[[r]]<-var + T15_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_4_gblup_variances_all[[r]]<-list() + T15_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_4_gfblup_variances_all[[r]]<-var + T15_4_gfblup_prediction_all[[r]]<-pred + T15_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_4_gblup_variances_all,"T15_4_gblup_variances_all_3001_4000.rds") +saveRDS(T15_4_gblup_prediction_all,"T15_4_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_4_gfblup_variances_all,"T15_4_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_4_gfblup_prediction_all,"T15_4_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_4_gfblup_validate_all,"T15_4_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_4_4001_5000.R b/code/using_GO/pla/T15_4_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..37fedac900e6bfeee13e3c6590469694f2eb396e --- /dev/null +++ b/code/using_GO/pla/T15_4_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_4_gblup_variances_all=rep(list(list()),cycles) +T15_4_gblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_variances_all=rep(list(list()),cycles) +T15_4_gfblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_4...") + y=pheno_df_pla$T15_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_4_gblup_variances_all[[r]]<-var + T15_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_4_gblup_variances_all[[r]]<-list() + T15_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_4_gfblup_variances_all[[r]]<-var + T15_4_gfblup_prediction_all[[r]]<-pred + T15_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_4_gblup_variances_all,"T15_4_gblup_variances_all_4001_5000.rds") +saveRDS(T15_4_gblup_prediction_all,"T15_4_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_4_gfblup_variances_all,"T15_4_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_4_gfblup_prediction_all,"T15_4_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_4_gfblup_validate_all,"T15_4_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_4_5001_6000.R b/code/using_GO/pla/T15_4_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..5c474ab1ef70f1bda21c3924d7f04df6b62a98c1 --- /dev/null +++ b/code/using_GO/pla/T15_4_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_4_gblup_variances_all=rep(list(list()),cycles) +T15_4_gblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_variances_all=rep(list(list()),cycles) +T15_4_gfblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_4...") + y=pheno_df_pla$T15_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_4_gblup_variances_all[[r]]<-var + T15_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_4_gblup_variances_all[[r]]<-list() + T15_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_4_gfblup_variances_all[[r]]<-var + T15_4_gfblup_prediction_all[[r]]<-pred + T15_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_4_gblup_variances_all,"T15_4_gblup_variances_all_5001_6000.rds") +saveRDS(T15_4_gblup_prediction_all,"T15_4_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_4_gfblup_variances_all,"T15_4_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_4_gfblup_prediction_all,"T15_4_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_4_gfblup_validate_all,"T15_4_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_4_6001_7297.R b/code/using_GO/pla/T15_4_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..6b4e58e333e038727e2542acabeba14bc1eba34c --- /dev/null +++ b/code/using_GO/pla/T15_4_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_4_gblup_variances_all=rep(list(list()),cycles) +T15_4_gblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_variances_all=rep(list(list()),cycles) +T15_4_gfblup_prediction_all=rep(list(list()),cycles) +T15_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_4...") + y=pheno_df_pla$T15_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_4_gblup_variances_all[[r]]<-var + T15_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_4_gblup_variances_all[[r]]<-list() + T15_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_4_gfblup_variances_all[[r]]<-var + T15_4_gfblup_prediction_all[[r]]<-pred + T15_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_4_gblup_variances_all,"T15_4_gblup_variances_all_6001_7297.rds") +saveRDS(T15_4_gblup_prediction_all,"T15_4_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_4_gfblup_variances_all,"T15_4_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_4_gfblup_prediction_all,"T15_4_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_4_gfblup_validate_all,"T15_4_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_5_1001_2000.R b/code/using_GO/pla/T15_5_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..f2ffdac3ecde5de2d6fb2c2d7a73627963c8b401 --- /dev/null +++ b/code/using_GO/pla/T15_5_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_5_gblup_variances_all=rep(list(list()),cycles) +T15_5_gblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_variances_all=rep(list(list()),cycles) +T15_5_gfblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_5...") + y=pheno_df_pla$T15_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_5_gblup_variances_all[[r]]<-var + T15_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_5_gblup_variances_all[[r]]<-list() + T15_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_5_gfblup_variances_all[[r]]<-var + T15_5_gfblup_prediction_all[[r]]<-pred + T15_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_5_gblup_variances_all,"T15_5_gblup_variances_all_1001_2000.rds") +saveRDS(T15_5_gblup_prediction_all,"T15_5_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_5_gfblup_variances_all,"T15_5_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_5_gfblup_prediction_all,"T15_5_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_5_gfblup_validate_all,"T15_5_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_5_1_1000.R b/code/using_GO/pla/T15_5_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..566bf793b50a925c90646d481fded38fda28f29e --- /dev/null +++ b/code/using_GO/pla/T15_5_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_5_gblup_variances_all=rep(list(list()),cycles) +T15_5_gblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_variances_all=rep(list(list()),cycles) +T15_5_gfblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_5...") + y=pheno_df_pla$T15_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_5_gblup_variances_all[[r]]<-var + T15_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_5_gblup_variances_all[[r]]<-list() + T15_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_5_gfblup_variances_all[[r]]<-var + T15_5_gfblup_prediction_all[[r]]<-pred + T15_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_5_gblup_variances_all,"T15_5_gblup_variances_all_1_1000.rds") +saveRDS(T15_5_gblup_prediction_all,"T15_5_gblup_prediction_all_1_1000.rds") +saveRDS(T15_5_gfblup_variances_all,"T15_5_gfblup_variances_all_1_1000.rds") +saveRDS(T15_5_gfblup_prediction_all,"T15_5_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_5_gfblup_validate_all,"T15_5_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_5_2001_3000.R b/code/using_GO/pla/T15_5_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..d7f1909218a059bcac017150ceead726526ca0d3 --- /dev/null +++ b/code/using_GO/pla/T15_5_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_5_gblup_variances_all=rep(list(list()),cycles) +T15_5_gblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_variances_all=rep(list(list()),cycles) +T15_5_gfblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_5...") + y=pheno_df_pla$T15_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_5_gblup_variances_all[[r]]<-var + T15_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_5_gblup_variances_all[[r]]<-list() + T15_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_5_gfblup_variances_all[[r]]<-var + T15_5_gfblup_prediction_all[[r]]<-pred + T15_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_5_gblup_variances_all,"T15_5_gblup_variances_all_2001_3000.rds") +saveRDS(T15_5_gblup_prediction_all,"T15_5_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_5_gfblup_variances_all,"T15_5_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_5_gfblup_prediction_all,"T15_5_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_5_gfblup_validate_all,"T15_5_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_5_3001_4000.R b/code/using_GO/pla/T15_5_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..c4dc6480e6da82736a17bfe599761444cc8335c8 --- /dev/null +++ b/code/using_GO/pla/T15_5_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_5_gblup_variances_all=rep(list(list()),cycles) +T15_5_gblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_variances_all=rep(list(list()),cycles) +T15_5_gfblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_5...") + y=pheno_df_pla$T15_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_5_gblup_variances_all[[r]]<-var + T15_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_5_gblup_variances_all[[r]]<-list() + T15_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_5_gfblup_variances_all[[r]]<-var + T15_5_gfblup_prediction_all[[r]]<-pred + T15_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_5_gblup_variances_all,"T15_5_gblup_variances_all_3001_4000.rds") +saveRDS(T15_5_gblup_prediction_all,"T15_5_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_5_gfblup_variances_all,"T15_5_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_5_gfblup_prediction_all,"T15_5_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_5_gfblup_validate_all,"T15_5_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_5_4001_5000.R b/code/using_GO/pla/T15_5_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..949ffc13cb427880e415a86f20e3944ffd31de19 --- /dev/null +++ b/code/using_GO/pla/T15_5_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_5_gblup_variances_all=rep(list(list()),cycles) +T15_5_gblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_variances_all=rep(list(list()),cycles) +T15_5_gfblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_5...") + y=pheno_df_pla$T15_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_5_gblup_variances_all[[r]]<-var + T15_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_5_gblup_variances_all[[r]]<-list() + T15_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_5_gfblup_variances_all[[r]]<-var + T15_5_gfblup_prediction_all[[r]]<-pred + T15_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_5_gblup_variances_all,"T15_5_gblup_variances_all_4001_5000.rds") +saveRDS(T15_5_gblup_prediction_all,"T15_5_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_5_gfblup_variances_all,"T15_5_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_5_gfblup_prediction_all,"T15_5_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_5_gfblup_validate_all,"T15_5_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_5_5001_6000.R b/code/using_GO/pla/T15_5_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..49e9a78765754793557e6a7bfe9dee2826419faf --- /dev/null +++ b/code/using_GO/pla/T15_5_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_5_gblup_variances_all=rep(list(list()),cycles) +T15_5_gblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_variances_all=rep(list(list()),cycles) +T15_5_gfblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_5...") + y=pheno_df_pla$T15_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_5_gblup_variances_all[[r]]<-var + T15_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_5_gblup_variances_all[[r]]<-list() + T15_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_5_gfblup_variances_all[[r]]<-var + T15_5_gfblup_prediction_all[[r]]<-pred + T15_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_5_gblup_variances_all,"T15_5_gblup_variances_all_5001_6000.rds") +saveRDS(T15_5_gblup_prediction_all,"T15_5_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_5_gfblup_variances_all,"T15_5_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_5_gfblup_prediction_all,"T15_5_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_5_gfblup_validate_all,"T15_5_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_5_6001_7297.R b/code/using_GO/pla/T15_5_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..e93c701367c6b84389979f62dd2274818d917dae --- /dev/null +++ b/code/using_GO/pla/T15_5_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_5_gblup_variances_all=rep(list(list()),cycles) +T15_5_gblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_variances_all=rep(list(list()),cycles) +T15_5_gfblup_prediction_all=rep(list(list()),cycles) +T15_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_5...") + y=pheno_df_pla$T15_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_5_gblup_variances_all[[r]]<-var + T15_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_5_gblup_variances_all[[r]]<-list() + T15_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_5_gfblup_variances_all[[r]]<-var + T15_5_gfblup_prediction_all[[r]]<-pred + T15_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_5_gblup_variances_all,"T15_5_gblup_variances_all_6001_7297.rds") +saveRDS(T15_5_gblup_prediction_all,"T15_5_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_5_gfblup_variances_all,"T15_5_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_5_gfblup_prediction_all,"T15_5_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_5_gfblup_validate_all,"T15_5_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_6_1001_2000.R b/code/using_GO/pla/T15_6_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..6e615e7b24918bbdbf493ca780f7abbad24f8adc --- /dev/null +++ b/code/using_GO/pla/T15_6_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_6_gblup_variances_all=rep(list(list()),cycles) +T15_6_gblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_variances_all=rep(list(list()),cycles) +T15_6_gfblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_6...") + y=pheno_df_pla$T15_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_6_gblup_variances_all[[r]]<-var + T15_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_6_gblup_variances_all[[r]]<-list() + T15_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_6_gfblup_variances_all[[r]]<-var + T15_6_gfblup_prediction_all[[r]]<-pred + T15_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_6_gblup_variances_all,"T15_6_gblup_variances_all_1001_2000.rds") +saveRDS(T15_6_gblup_prediction_all,"T15_6_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_6_gfblup_variances_all,"T15_6_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_6_gfblup_prediction_all,"T15_6_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_6_gfblup_validate_all,"T15_6_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_6_1_1000.R b/code/using_GO/pla/T15_6_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..c302bb2f66b059a531e30ec24b9f6780594529d2 --- /dev/null +++ b/code/using_GO/pla/T15_6_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_6_gblup_variances_all=rep(list(list()),cycles) +T15_6_gblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_variances_all=rep(list(list()),cycles) +T15_6_gfblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_6...") + y=pheno_df_pla$T15_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_6_gblup_variances_all[[r]]<-var + T15_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_6_gblup_variances_all[[r]]<-list() + T15_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_6_gfblup_variances_all[[r]]<-var + T15_6_gfblup_prediction_all[[r]]<-pred + T15_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_6_gblup_variances_all,"T15_6_gblup_variances_all_1_1000.rds") +saveRDS(T15_6_gblup_prediction_all,"T15_6_gblup_prediction_all_1_1000.rds") +saveRDS(T15_6_gfblup_variances_all,"T15_6_gfblup_variances_all_1_1000.rds") +saveRDS(T15_6_gfblup_prediction_all,"T15_6_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_6_gfblup_validate_all,"T15_6_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_6_2001_3000.R b/code/using_GO/pla/T15_6_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..36197e0d6d430137b6ede8061a7470cc55b5781c --- /dev/null +++ b/code/using_GO/pla/T15_6_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_6_gblup_variances_all=rep(list(list()),cycles) +T15_6_gblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_variances_all=rep(list(list()),cycles) +T15_6_gfblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_6...") + y=pheno_df_pla$T15_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_6_gblup_variances_all[[r]]<-var + T15_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_6_gblup_variances_all[[r]]<-list() + T15_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_6_gfblup_variances_all[[r]]<-var + T15_6_gfblup_prediction_all[[r]]<-pred + T15_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_6_gblup_variances_all,"T15_6_gblup_variances_all_2001_3000.rds") +saveRDS(T15_6_gblup_prediction_all,"T15_6_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_6_gfblup_variances_all,"T15_6_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_6_gfblup_prediction_all,"T15_6_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_6_gfblup_validate_all,"T15_6_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_6_3001_4000.R b/code/using_GO/pla/T15_6_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..42c18753da3f9bb196cd400e25c380ebb02b18cd --- /dev/null +++ b/code/using_GO/pla/T15_6_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_6_gblup_variances_all=rep(list(list()),cycles) +T15_6_gblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_variances_all=rep(list(list()),cycles) +T15_6_gfblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_6...") + y=pheno_df_pla$T15_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_6_gblup_variances_all[[r]]<-var + T15_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_6_gblup_variances_all[[r]]<-list() + T15_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_6_gfblup_variances_all[[r]]<-var + T15_6_gfblup_prediction_all[[r]]<-pred + T15_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_6_gblup_variances_all,"T15_6_gblup_variances_all_3001_4000.rds") +saveRDS(T15_6_gblup_prediction_all,"T15_6_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_6_gfblup_variances_all,"T15_6_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_6_gfblup_prediction_all,"T15_6_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_6_gfblup_validate_all,"T15_6_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_6_4001_5000.R b/code/using_GO/pla/T15_6_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..760db500d36667d0b0bee817708f1cf16b258399 --- /dev/null +++ b/code/using_GO/pla/T15_6_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_6_gblup_variances_all=rep(list(list()),cycles) +T15_6_gblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_variances_all=rep(list(list()),cycles) +T15_6_gfblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_6...") + y=pheno_df_pla$T15_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_6_gblup_variances_all[[r]]<-var + T15_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_6_gblup_variances_all[[r]]<-list() + T15_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_6_gfblup_variances_all[[r]]<-var + T15_6_gfblup_prediction_all[[r]]<-pred + T15_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_6_gblup_variances_all,"T15_6_gblup_variances_all_4001_5000.rds") +saveRDS(T15_6_gblup_prediction_all,"T15_6_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_6_gfblup_variances_all,"T15_6_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_6_gfblup_prediction_all,"T15_6_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_6_gfblup_validate_all,"T15_6_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_6_5001_6000.R b/code/using_GO/pla/T15_6_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..b23f2accc26da8c33c7ded1b30bb76bee8a1b512 --- /dev/null +++ b/code/using_GO/pla/T15_6_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_6_gblup_variances_all=rep(list(list()),cycles) +T15_6_gblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_variances_all=rep(list(list()),cycles) +T15_6_gfblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_6...") + y=pheno_df_pla$T15_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_6_gblup_variances_all[[r]]<-var + T15_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_6_gblup_variances_all[[r]]<-list() + T15_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_6_gfblup_variances_all[[r]]<-var + T15_6_gfblup_prediction_all[[r]]<-pred + T15_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_6_gblup_variances_all,"T15_6_gblup_variances_all_5001_6000.rds") +saveRDS(T15_6_gblup_prediction_all,"T15_6_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_6_gfblup_variances_all,"T15_6_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_6_gfblup_prediction_all,"T15_6_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_6_gfblup_validate_all,"T15_6_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_6_6001_7297.R b/code/using_GO/pla/T15_6_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..78c2aeee77540b6ec412dd959a562380c94e7c2d --- /dev/null +++ b/code/using_GO/pla/T15_6_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_6_gblup_variances_all=rep(list(list()),cycles) +T15_6_gblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_variances_all=rep(list(list()),cycles) +T15_6_gfblup_prediction_all=rep(list(list()),cycles) +T15_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_6...") + y=pheno_df_pla$T15_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_6_gblup_variances_all[[r]]<-var + T15_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_6_gblup_variances_all[[r]]<-list() + T15_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_6_gfblup_variances_all[[r]]<-var + T15_6_gfblup_prediction_all[[r]]<-pred + T15_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_6_gblup_variances_all,"T15_6_gblup_variances_all_6001_7297.rds") +saveRDS(T15_6_gblup_prediction_all,"T15_6_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_6_gfblup_variances_all,"T15_6_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_6_gfblup_prediction_all,"T15_6_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_6_gfblup_validate_all,"T15_6_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_7_1001_2000.R b/code/using_GO/pla/T15_7_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..c0786b9fb272d6e67aee3fcfb40919d03e0cfa08 --- /dev/null +++ b/code/using_GO/pla/T15_7_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_7_gblup_variances_all=rep(list(list()),cycles) +T15_7_gblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_variances_all=rep(list(list()),cycles) +T15_7_gfblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_7...") + y=pheno_df_pla$T15_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_7_gblup_variances_all[[r]]<-var + T15_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_7_gblup_variances_all[[r]]<-list() + T15_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_7_gfblup_variances_all[[r]]<-var + T15_7_gfblup_prediction_all[[r]]<-pred + T15_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_7_gblup_variances_all,"T15_7_gblup_variances_all_1001_2000.rds") +saveRDS(T15_7_gblup_prediction_all,"T15_7_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_7_gfblup_variances_all,"T15_7_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_7_gfblup_prediction_all,"T15_7_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_7_gfblup_validate_all,"T15_7_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_7_1_1000.R b/code/using_GO/pla/T15_7_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..86ba4bcdc9c443d548dbc92853d50e09aafdb0cf --- /dev/null +++ b/code/using_GO/pla/T15_7_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_7_gblup_variances_all=rep(list(list()),cycles) +T15_7_gblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_variances_all=rep(list(list()),cycles) +T15_7_gfblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_7...") + y=pheno_df_pla$T15_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_7_gblup_variances_all[[r]]<-var + T15_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_7_gblup_variances_all[[r]]<-list() + T15_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_7_gfblup_variances_all[[r]]<-var + T15_7_gfblup_prediction_all[[r]]<-pred + T15_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_7_gblup_variances_all,"T15_7_gblup_variances_all_1_1000.rds") +saveRDS(T15_7_gblup_prediction_all,"T15_7_gblup_prediction_all_1_1000.rds") +saveRDS(T15_7_gfblup_variances_all,"T15_7_gfblup_variances_all_1_1000.rds") +saveRDS(T15_7_gfblup_prediction_all,"T15_7_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_7_gfblup_validate_all,"T15_7_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_7_2001_3000.R b/code/using_GO/pla/T15_7_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..4c7d97704efba722eccf37e10e01eacab50f6961 --- /dev/null +++ b/code/using_GO/pla/T15_7_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_7_gblup_variances_all=rep(list(list()),cycles) +T15_7_gblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_variances_all=rep(list(list()),cycles) +T15_7_gfblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_7...") + y=pheno_df_pla$T15_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_7_gblup_variances_all[[r]]<-var + T15_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_7_gblup_variances_all[[r]]<-list() + T15_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_7_gfblup_variances_all[[r]]<-var + T15_7_gfblup_prediction_all[[r]]<-pred + T15_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_7_gblup_variances_all,"T15_7_gblup_variances_all_2001_3000.rds") +saveRDS(T15_7_gblup_prediction_all,"T15_7_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_7_gfblup_variances_all,"T15_7_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_7_gfblup_prediction_all,"T15_7_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_7_gfblup_validate_all,"T15_7_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_7_3001_4000.R b/code/using_GO/pla/T15_7_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..49a5a815c10a2d6fd84303e303106c4d40d68de1 --- /dev/null +++ b/code/using_GO/pla/T15_7_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_7_gblup_variances_all=rep(list(list()),cycles) +T15_7_gblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_variances_all=rep(list(list()),cycles) +T15_7_gfblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_7...") + y=pheno_df_pla$T15_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_7_gblup_variances_all[[r]]<-var + T15_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_7_gblup_variances_all[[r]]<-list() + T15_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_7_gfblup_variances_all[[r]]<-var + T15_7_gfblup_prediction_all[[r]]<-pred + T15_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_7_gblup_variances_all,"T15_7_gblup_variances_all_3001_4000.rds") +saveRDS(T15_7_gblup_prediction_all,"T15_7_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_7_gfblup_variances_all,"T15_7_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_7_gfblup_prediction_all,"T15_7_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_7_gfblup_validate_all,"T15_7_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_7_4001_5000.R b/code/using_GO/pla/T15_7_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..824c87897829137f7c2221aca2dbf5e915141a29 --- /dev/null +++ b/code/using_GO/pla/T15_7_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_7_gblup_variances_all=rep(list(list()),cycles) +T15_7_gblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_variances_all=rep(list(list()),cycles) +T15_7_gfblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_7...") + y=pheno_df_pla$T15_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_7_gblup_variances_all[[r]]<-var + T15_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_7_gblup_variances_all[[r]]<-list() + T15_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_7_gfblup_variances_all[[r]]<-var + T15_7_gfblup_prediction_all[[r]]<-pred + T15_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_7_gblup_variances_all,"T15_7_gblup_variances_all_4001_5000.rds") +saveRDS(T15_7_gblup_prediction_all,"T15_7_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_7_gfblup_variances_all,"T15_7_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_7_gfblup_prediction_all,"T15_7_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_7_gfblup_validate_all,"T15_7_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_7_5001_6000.R b/code/using_GO/pla/T15_7_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..92c45f8e0c2cd6c759b80bc179d845dcf53e1a13 --- /dev/null +++ b/code/using_GO/pla/T15_7_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_7_gblup_variances_all=rep(list(list()),cycles) +T15_7_gblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_variances_all=rep(list(list()),cycles) +T15_7_gfblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_7...") + y=pheno_df_pla$T15_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_7_gblup_variances_all[[r]]<-var + T15_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_7_gblup_variances_all[[r]]<-list() + T15_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_7_gfblup_variances_all[[r]]<-var + T15_7_gfblup_prediction_all[[r]]<-pred + T15_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_7_gblup_variances_all,"T15_7_gblup_variances_all_5001_6000.rds") +saveRDS(T15_7_gblup_prediction_all,"T15_7_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_7_gfblup_variances_all,"T15_7_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_7_gfblup_prediction_all,"T15_7_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_7_gfblup_validate_all,"T15_7_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_7_6001_7297.R b/code/using_GO/pla/T15_7_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..58ee2a30618b88e1f44aba51e1ce0347e8229dae --- /dev/null +++ b/code/using_GO/pla/T15_7_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_7_gblup_variances_all=rep(list(list()),cycles) +T15_7_gblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_variances_all=rep(list(list()),cycles) +T15_7_gfblup_prediction_all=rep(list(list()),cycles) +T15_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_7...") + y=pheno_df_pla$T15_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_7_gblup_variances_all[[r]]<-var + T15_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_7_gblup_variances_all[[r]]<-list() + T15_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_7_gfblup_variances_all[[r]]<-var + T15_7_gfblup_prediction_all[[r]]<-pred + T15_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_7_gblup_variances_all,"T15_7_gblup_variances_all_6001_7297.rds") +saveRDS(T15_7_gblup_prediction_all,"T15_7_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_7_gfblup_variances_all,"T15_7_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_7_gfblup_prediction_all,"T15_7_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_7_gfblup_validate_all,"T15_7_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_8_1001_2000.R b/code/using_GO/pla/T15_8_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..a903e504739161b8a10284ea2870e9489dbabc1b --- /dev/null +++ b/code/using_GO/pla/T15_8_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_8_gblup_variances_all=rep(list(list()),cycles) +T15_8_gblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_variances_all=rep(list(list()),cycles) +T15_8_gfblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_8...") + y=pheno_df_pla$T15_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_8_gblup_variances_all[[r]]<-var + T15_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_8_gblup_variances_all[[r]]<-list() + T15_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_8_gfblup_variances_all[[r]]<-var + T15_8_gfblup_prediction_all[[r]]<-pred + T15_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_8_gblup_variances_all,"T15_8_gblup_variances_all_1001_2000.rds") +saveRDS(T15_8_gblup_prediction_all,"T15_8_gblup_prediction_all_1001_2000.rds") +saveRDS(T15_8_gfblup_variances_all,"T15_8_gfblup_variances_all_1001_2000.rds") +saveRDS(T15_8_gfblup_prediction_all,"T15_8_gfblup_prediction_all_1001_2000.rds") +saveRDS(T15_8_gfblup_validate_all,"T15_8_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_8_1_1000.R b/code/using_GO/pla/T15_8_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..29b7426b0a68ec9d31bbc0b4518907a6833dfbb0 --- /dev/null +++ b/code/using_GO/pla/T15_8_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_8_gblup_variances_all=rep(list(list()),cycles) +T15_8_gblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_variances_all=rep(list(list()),cycles) +T15_8_gfblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_8...") + y=pheno_df_pla$T15_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_8_gblup_variances_all[[r]]<-var + T15_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_8_gblup_variances_all[[r]]<-list() + T15_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_8_gfblup_variances_all[[r]]<-var + T15_8_gfblup_prediction_all[[r]]<-pred + T15_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_8_gblup_variances_all,"T15_8_gblup_variances_all_1_1000.rds") +saveRDS(T15_8_gblup_prediction_all,"T15_8_gblup_prediction_all_1_1000.rds") +saveRDS(T15_8_gfblup_variances_all,"T15_8_gfblup_variances_all_1_1000.rds") +saveRDS(T15_8_gfblup_prediction_all,"T15_8_gfblup_prediction_all_1_1000.rds") +saveRDS(T15_8_gfblup_validate_all,"T15_8_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_8_2001_3000.R b/code/using_GO/pla/T15_8_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..8e9ff544a3a1c8babec026ea0693e4b4ee28d26a --- /dev/null +++ b/code/using_GO/pla/T15_8_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_8_gblup_variances_all=rep(list(list()),cycles) +T15_8_gblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_variances_all=rep(list(list()),cycles) +T15_8_gfblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_8...") + y=pheno_df_pla$T15_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_8_gblup_variances_all[[r]]<-var + T15_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_8_gblup_variances_all[[r]]<-list() + T15_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_8_gfblup_variances_all[[r]]<-var + T15_8_gfblup_prediction_all[[r]]<-pred + T15_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_8_gblup_variances_all,"T15_8_gblup_variances_all_2001_3000.rds") +saveRDS(T15_8_gblup_prediction_all,"T15_8_gblup_prediction_all_2001_3000.rds") +saveRDS(T15_8_gfblup_variances_all,"T15_8_gfblup_variances_all_2001_3000.rds") +saveRDS(T15_8_gfblup_prediction_all,"T15_8_gfblup_prediction_all_2001_3000.rds") +saveRDS(T15_8_gfblup_validate_all,"T15_8_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_8_3001_4000.R b/code/using_GO/pla/T15_8_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..098466ead9c5735af647730c40d5d1a5b6dea627 --- /dev/null +++ b/code/using_GO/pla/T15_8_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_8_gblup_variances_all=rep(list(list()),cycles) +T15_8_gblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_variances_all=rep(list(list()),cycles) +T15_8_gfblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_8...") + y=pheno_df_pla$T15_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_8_gblup_variances_all[[r]]<-var + T15_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_8_gblup_variances_all[[r]]<-list() + T15_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_8_gfblup_variances_all[[r]]<-var + T15_8_gfblup_prediction_all[[r]]<-pred + T15_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_8_gblup_variances_all,"T15_8_gblup_variances_all_3001_4000.rds") +saveRDS(T15_8_gblup_prediction_all,"T15_8_gblup_prediction_all_3001_4000.rds") +saveRDS(T15_8_gfblup_variances_all,"T15_8_gfblup_variances_all_3001_4000.rds") +saveRDS(T15_8_gfblup_prediction_all,"T15_8_gfblup_prediction_all_3001_4000.rds") +saveRDS(T15_8_gfblup_validate_all,"T15_8_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_8_4001_5000.R b/code/using_GO/pla/T15_8_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..8c0f44c474ec1c026b35e94875e4b79e44feb1d6 --- /dev/null +++ b/code/using_GO/pla/T15_8_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_8_gblup_variances_all=rep(list(list()),cycles) +T15_8_gblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_variances_all=rep(list(list()),cycles) +T15_8_gfblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_8...") + y=pheno_df_pla$T15_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_8_gblup_variances_all[[r]]<-var + T15_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_8_gblup_variances_all[[r]]<-list() + T15_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_8_gfblup_variances_all[[r]]<-var + T15_8_gfblup_prediction_all[[r]]<-pred + T15_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_8_gblup_variances_all,"T15_8_gblup_variances_all_4001_5000.rds") +saveRDS(T15_8_gblup_prediction_all,"T15_8_gblup_prediction_all_4001_5000.rds") +saveRDS(T15_8_gfblup_variances_all,"T15_8_gfblup_variances_all_4001_5000.rds") +saveRDS(T15_8_gfblup_prediction_all,"T15_8_gfblup_prediction_all_4001_5000.rds") +saveRDS(T15_8_gfblup_validate_all,"T15_8_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_8_5001_6000.R b/code/using_GO/pla/T15_8_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..6b7141f554583a51710e692ab3028e77ad221036 --- /dev/null +++ b/code/using_GO/pla/T15_8_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_8_gblup_variances_all=rep(list(list()),cycles) +T15_8_gblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_variances_all=rep(list(list()),cycles) +T15_8_gfblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_8...") + y=pheno_df_pla$T15_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_8_gblup_variances_all[[r]]<-var + T15_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_8_gblup_variances_all[[r]]<-list() + T15_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_8_gfblup_variances_all[[r]]<-var + T15_8_gfblup_prediction_all[[r]]<-pred + T15_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_8_gblup_variances_all,"T15_8_gblup_variances_all_5001_6000.rds") +saveRDS(T15_8_gblup_prediction_all,"T15_8_gblup_prediction_all_5001_6000.rds") +saveRDS(T15_8_gfblup_variances_all,"T15_8_gfblup_variances_all_5001_6000.rds") +saveRDS(T15_8_gfblup_prediction_all,"T15_8_gfblup_prediction_all_5001_6000.rds") +saveRDS(T15_8_gfblup_validate_all,"T15_8_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T15_8_6001_7297.R b/code/using_GO/pla/T15_8_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..94a92dfbfefef7d1b8071c00aa4a26ead26c6f4c --- /dev/null +++ b/code/using_GO/pla/T15_8_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T15_8_gblup_variances_all=rep(list(list()),cycles) +T15_8_gblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_variances_all=rep(list(list()),cycles) +T15_8_gfblup_prediction_all=rep(list(list()),cycles) +T15_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T15_8...") + y=pheno_df_pla$T15_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T15_8_gblup_variances_all[[r]]<-var + T15_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T15_8_gblup_variances_all[[r]]<-list() + T15_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T15_8_gfblup_variances_all[[r]]<-var + T15_8_gfblup_prediction_all[[r]]<-pred + T15_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T15_8_gblup_variances_all,"T15_8_gblup_variances_all_6001_7297.rds") +saveRDS(T15_8_gblup_prediction_all,"T15_8_gblup_prediction_all_6001_7297.rds") +saveRDS(T15_8_gfblup_variances_all,"T15_8_gfblup_variances_all_6001_7297.rds") +saveRDS(T15_8_gfblup_prediction_all,"T15_8_gfblup_prediction_all_6001_7297.rds") +saveRDS(T15_8_gfblup_validate_all,"T15_8_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_1_1001_2000.R b/code/using_GO/pla/T16_1_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..15c58c9d2647d1d12188a3f4fa2f61be5101c3e4 --- /dev/null +++ b/code/using_GO/pla/T16_1_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_1_gblup_variances_all=rep(list(list()),cycles) +T16_1_gblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_variances_all=rep(list(list()),cycles) +T16_1_gfblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_1...") + y=pheno_df_pla$T16_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_1_gblup_variances_all[[r]]<-var + T16_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_1_gblup_variances_all[[r]]<-list() + T16_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_1_gfblup_variances_all[[r]]<-var + T16_1_gfblup_prediction_all[[r]]<-pred + T16_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_1_gblup_variances_all,"T16_1_gblup_variances_all_1001_2000.rds") +saveRDS(T16_1_gblup_prediction_all,"T16_1_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_1_gfblup_variances_all,"T16_1_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_1_gfblup_prediction_all,"T16_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_1_gfblup_validate_all,"T16_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_1_1_1000.R b/code/using_GO/pla/T16_1_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..5859c7dd44dfe82d0d194d0eb24c84049fb606aa --- /dev/null +++ b/code/using_GO/pla/T16_1_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_1_gblup_variances_all=rep(list(list()),cycles) +T16_1_gblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_variances_all=rep(list(list()),cycles) +T16_1_gfblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_1...") + y=pheno_df_pla$T16_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_1_gblup_variances_all[[r]]<-var + T16_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_1_gblup_variances_all[[r]]<-list() + T16_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_1_gfblup_variances_all[[r]]<-var + T16_1_gfblup_prediction_all[[r]]<-pred + T16_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_1_gblup_variances_all,"T16_1_gblup_variances_all_1_1000.rds") +saveRDS(T16_1_gblup_prediction_all,"T16_1_gblup_prediction_all_1_1000.rds") +saveRDS(T16_1_gfblup_variances_all,"T16_1_gfblup_variances_all_1_1000.rds") +saveRDS(T16_1_gfblup_prediction_all,"T16_1_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_1_gfblup_validate_all,"T16_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_1_2001_3000.R b/code/using_GO/pla/T16_1_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..b98a17a41e5efa940660a37f6fe9d44c7156cf38 --- /dev/null +++ b/code/using_GO/pla/T16_1_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_1_gblup_variances_all=rep(list(list()),cycles) +T16_1_gblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_variances_all=rep(list(list()),cycles) +T16_1_gfblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_1...") + y=pheno_df_pla$T16_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_1_gblup_variances_all[[r]]<-var + T16_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_1_gblup_variances_all[[r]]<-list() + T16_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_1_gfblup_variances_all[[r]]<-var + T16_1_gfblup_prediction_all[[r]]<-pred + T16_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_1_gblup_variances_all,"T16_1_gblup_variances_all_2001_3000.rds") +saveRDS(T16_1_gblup_prediction_all,"T16_1_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_1_gfblup_variances_all,"T16_1_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_1_gfblup_prediction_all,"T16_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_1_gfblup_validate_all,"T16_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_1_3001_4000.R b/code/using_GO/pla/T16_1_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..d50b02638501e82c055182be14a573d95122f588 --- /dev/null +++ b/code/using_GO/pla/T16_1_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_1_gblup_variances_all=rep(list(list()),cycles) +T16_1_gblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_variances_all=rep(list(list()),cycles) +T16_1_gfblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_1...") + y=pheno_df_pla$T16_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_1_gblup_variances_all[[r]]<-var + T16_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_1_gblup_variances_all[[r]]<-list() + T16_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_1_gfblup_variances_all[[r]]<-var + T16_1_gfblup_prediction_all[[r]]<-pred + T16_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_1_gblup_variances_all,"T16_1_gblup_variances_all_3001_4000.rds") +saveRDS(T16_1_gblup_prediction_all,"T16_1_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_1_gfblup_variances_all,"T16_1_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_1_gfblup_prediction_all,"T16_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_1_gfblup_validate_all,"T16_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_1_4001_5000.R b/code/using_GO/pla/T16_1_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..9b96548df2dede9329fa57e4eaac8cd770bd83cb --- /dev/null +++ b/code/using_GO/pla/T16_1_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_1_gblup_variances_all=rep(list(list()),cycles) +T16_1_gblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_variances_all=rep(list(list()),cycles) +T16_1_gfblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_1...") + y=pheno_df_pla$T16_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_1_gblup_variances_all[[r]]<-var + T16_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_1_gblup_variances_all[[r]]<-list() + T16_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_1_gfblup_variances_all[[r]]<-var + T16_1_gfblup_prediction_all[[r]]<-pred + T16_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_1_gblup_variances_all,"T16_1_gblup_variances_all_4001_5000.rds") +saveRDS(T16_1_gblup_prediction_all,"T16_1_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_1_gfblup_variances_all,"T16_1_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_1_gfblup_prediction_all,"T16_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_1_gfblup_validate_all,"T16_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_1_5001_6000.R b/code/using_GO/pla/T16_1_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..8e3db7d393686c871b8be08639e5b47dd21dc532 --- /dev/null +++ b/code/using_GO/pla/T16_1_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_1_gblup_variances_all=rep(list(list()),cycles) +T16_1_gblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_variances_all=rep(list(list()),cycles) +T16_1_gfblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_1...") + y=pheno_df_pla$T16_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_1_gblup_variances_all[[r]]<-var + T16_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_1_gblup_variances_all[[r]]<-list() + T16_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_1_gfblup_variances_all[[r]]<-var + T16_1_gfblup_prediction_all[[r]]<-pred + T16_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_1_gblup_variances_all,"T16_1_gblup_variances_all_5001_6000.rds") +saveRDS(T16_1_gblup_prediction_all,"T16_1_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_1_gfblup_variances_all,"T16_1_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_1_gfblup_prediction_all,"T16_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_1_gfblup_validate_all,"T16_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_1_6001_7297.R b/code/using_GO/pla/T16_1_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..136d48f7f1772b84f96a9ef5e6036cf418958463 --- /dev/null +++ b/code/using_GO/pla/T16_1_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_1_gblup_variances_all=rep(list(list()),cycles) +T16_1_gblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_variances_all=rep(list(list()),cycles) +T16_1_gfblup_prediction_all=rep(list(list()),cycles) +T16_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_1...") + y=pheno_df_pla$T16_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_1_gblup_variances_all[[r]]<-var + T16_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_1_gblup_variances_all[[r]]<-list() + T16_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_1_gfblup_variances_all[[r]]<-var + T16_1_gfblup_prediction_all[[r]]<-pred + T16_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_1_gblup_variances_all,"T16_1_gblup_variances_all_6001_7297.rds") +saveRDS(T16_1_gblup_prediction_all,"T16_1_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_1_gfblup_variances_all,"T16_1_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_1_gfblup_prediction_all,"T16_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_1_gfblup_validate_all,"T16_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_2_1001_2000.R b/code/using_GO/pla/T16_2_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..e7d6af6ed30293ae5218769d28f8a121886c9eab --- /dev/null +++ b/code/using_GO/pla/T16_2_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_2_gblup_variances_all=rep(list(list()),cycles) +T16_2_gblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_variances_all=rep(list(list()),cycles) +T16_2_gfblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_2...") + y=pheno_df_pla$T16_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_2_gblup_variances_all[[r]]<-var + T16_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_2_gblup_variances_all[[r]]<-list() + T16_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_2_gfblup_variances_all[[r]]<-var + T16_2_gfblup_prediction_all[[r]]<-pred + T16_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_2_gblup_variances_all,"T16_2_gblup_variances_all_1001_2000.rds") +saveRDS(T16_2_gblup_prediction_all,"T16_2_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_2_gfblup_variances_all,"T16_2_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_2_gfblup_prediction_all,"T16_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_2_gfblup_validate_all,"T16_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_2_1_1000.R b/code/using_GO/pla/T16_2_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..b18897dd3c604a5570ab5f2d817fea5541df31c6 --- /dev/null +++ b/code/using_GO/pla/T16_2_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_2_gblup_variances_all=rep(list(list()),cycles) +T16_2_gblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_variances_all=rep(list(list()),cycles) +T16_2_gfblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_2...") + y=pheno_df_pla$T16_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_2_gblup_variances_all[[r]]<-var + T16_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_2_gblup_variances_all[[r]]<-list() + T16_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_2_gfblup_variances_all[[r]]<-var + T16_2_gfblup_prediction_all[[r]]<-pred + T16_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_2_gblup_variances_all,"T16_2_gblup_variances_all_1_1000.rds") +saveRDS(T16_2_gblup_prediction_all,"T16_2_gblup_prediction_all_1_1000.rds") +saveRDS(T16_2_gfblup_variances_all,"T16_2_gfblup_variances_all_1_1000.rds") +saveRDS(T16_2_gfblup_prediction_all,"T16_2_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_2_gfblup_validate_all,"T16_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_2_2001_3000.R b/code/using_GO/pla/T16_2_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..69c45b4d1236bc050a0c1587e25d45f7f5f201d2 --- /dev/null +++ b/code/using_GO/pla/T16_2_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_2_gblup_variances_all=rep(list(list()),cycles) +T16_2_gblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_variances_all=rep(list(list()),cycles) +T16_2_gfblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_2...") + y=pheno_df_pla$T16_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_2_gblup_variances_all[[r]]<-var + T16_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_2_gblup_variances_all[[r]]<-list() + T16_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_2_gfblup_variances_all[[r]]<-var + T16_2_gfblup_prediction_all[[r]]<-pred + T16_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_2_gblup_variances_all,"T16_2_gblup_variances_all_2001_3000.rds") +saveRDS(T16_2_gblup_prediction_all,"T16_2_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_2_gfblup_variances_all,"T16_2_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_2_gfblup_prediction_all,"T16_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_2_gfblup_validate_all,"T16_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_2_3001_4000.R b/code/using_GO/pla/T16_2_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..8c470465b772996b8ef9abbd91213d445870fc04 --- /dev/null +++ b/code/using_GO/pla/T16_2_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_2_gblup_variances_all=rep(list(list()),cycles) +T16_2_gblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_variances_all=rep(list(list()),cycles) +T16_2_gfblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_2...") + y=pheno_df_pla$T16_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_2_gblup_variances_all[[r]]<-var + T16_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_2_gblup_variances_all[[r]]<-list() + T16_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_2_gfblup_variances_all[[r]]<-var + T16_2_gfblup_prediction_all[[r]]<-pred + T16_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_2_gblup_variances_all,"T16_2_gblup_variances_all_3001_4000.rds") +saveRDS(T16_2_gblup_prediction_all,"T16_2_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_2_gfblup_variances_all,"T16_2_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_2_gfblup_prediction_all,"T16_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_2_gfblup_validate_all,"T16_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_2_4001_5000.R b/code/using_GO/pla/T16_2_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..78ed0a37db7b0d16823920794eb766c3f2dd5a6d --- /dev/null +++ b/code/using_GO/pla/T16_2_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_2_gblup_variances_all=rep(list(list()),cycles) +T16_2_gblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_variances_all=rep(list(list()),cycles) +T16_2_gfblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_2...") + y=pheno_df_pla$T16_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_2_gblup_variances_all[[r]]<-var + T16_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_2_gblup_variances_all[[r]]<-list() + T16_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_2_gfblup_variances_all[[r]]<-var + T16_2_gfblup_prediction_all[[r]]<-pred + T16_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_2_gblup_variances_all,"T16_2_gblup_variances_all_4001_5000.rds") +saveRDS(T16_2_gblup_prediction_all,"T16_2_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_2_gfblup_variances_all,"T16_2_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_2_gfblup_prediction_all,"T16_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_2_gfblup_validate_all,"T16_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_2_5001_6000.R b/code/using_GO/pla/T16_2_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..ffb62ea13b2e604aff20a7aa297a53dbbfc2a301 --- /dev/null +++ b/code/using_GO/pla/T16_2_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_2_gblup_variances_all=rep(list(list()),cycles) +T16_2_gblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_variances_all=rep(list(list()),cycles) +T16_2_gfblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_2...") + y=pheno_df_pla$T16_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_2_gblup_variances_all[[r]]<-var + T16_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_2_gblup_variances_all[[r]]<-list() + T16_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_2_gfblup_variances_all[[r]]<-var + T16_2_gfblup_prediction_all[[r]]<-pred + T16_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_2_gblup_variances_all,"T16_2_gblup_variances_all_5001_6000.rds") +saveRDS(T16_2_gblup_prediction_all,"T16_2_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_2_gfblup_variances_all,"T16_2_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_2_gfblup_prediction_all,"T16_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_2_gfblup_validate_all,"T16_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_2_6001_7297.R b/code/using_GO/pla/T16_2_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..20f381b1954755ab7866cf88914949175acb0ec3 --- /dev/null +++ b/code/using_GO/pla/T16_2_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_2_gblup_variances_all=rep(list(list()),cycles) +T16_2_gblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_variances_all=rep(list(list()),cycles) +T16_2_gfblup_prediction_all=rep(list(list()),cycles) +T16_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_2...") + y=pheno_df_pla$T16_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_2_gblup_variances_all[[r]]<-var + T16_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_2_gblup_variances_all[[r]]<-list() + T16_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_2_gfblup_variances_all[[r]]<-var + T16_2_gfblup_prediction_all[[r]]<-pred + T16_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_2_gblup_variances_all,"T16_2_gblup_variances_all_6001_7297.rds") +saveRDS(T16_2_gblup_prediction_all,"T16_2_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_2_gfblup_variances_all,"T16_2_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_2_gfblup_prediction_all,"T16_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_2_gfblup_validate_all,"T16_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_3_1001_2000.R b/code/using_GO/pla/T16_3_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..0c2b84f57fa0ad27f05af53c95aece3cee97fe1f --- /dev/null +++ b/code/using_GO/pla/T16_3_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_3_gblup_variances_all=rep(list(list()),cycles) +T16_3_gblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_variances_all=rep(list(list()),cycles) +T16_3_gfblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_3...") + y=pheno_df_pla$T16_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_3_gblup_variances_all[[r]]<-var + T16_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_3_gblup_variances_all[[r]]<-list() + T16_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_3_gfblup_variances_all[[r]]<-var + T16_3_gfblup_prediction_all[[r]]<-pred + T16_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_3_gblup_variances_all,"T16_3_gblup_variances_all_1001_2000.rds") +saveRDS(T16_3_gblup_prediction_all,"T16_3_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_3_gfblup_variances_all,"T16_3_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_3_gfblup_prediction_all,"T16_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_3_gfblup_validate_all,"T16_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_3_1_1000.R b/code/using_GO/pla/T16_3_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..3eef118f4ace058f0ac4acb436120a680ec570e2 --- /dev/null +++ b/code/using_GO/pla/T16_3_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_3_gblup_variances_all=rep(list(list()),cycles) +T16_3_gblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_variances_all=rep(list(list()),cycles) +T16_3_gfblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_3...") + y=pheno_df_pla$T16_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_3_gblup_variances_all[[r]]<-var + T16_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_3_gblup_variances_all[[r]]<-list() + T16_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_3_gfblup_variances_all[[r]]<-var + T16_3_gfblup_prediction_all[[r]]<-pred + T16_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_3_gblup_variances_all,"T16_3_gblup_variances_all_1_1000.rds") +saveRDS(T16_3_gblup_prediction_all,"T16_3_gblup_prediction_all_1_1000.rds") +saveRDS(T16_3_gfblup_variances_all,"T16_3_gfblup_variances_all_1_1000.rds") +saveRDS(T16_3_gfblup_prediction_all,"T16_3_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_3_gfblup_validate_all,"T16_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_3_2001_3000.R b/code/using_GO/pla/T16_3_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..d84db561acfe4ae65340758f3b3fde85b2ff7c00 --- /dev/null +++ b/code/using_GO/pla/T16_3_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_3_gblup_variances_all=rep(list(list()),cycles) +T16_3_gblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_variances_all=rep(list(list()),cycles) +T16_3_gfblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_3...") + y=pheno_df_pla$T16_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_3_gblup_variances_all[[r]]<-var + T16_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_3_gblup_variances_all[[r]]<-list() + T16_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_3_gfblup_variances_all[[r]]<-var + T16_3_gfblup_prediction_all[[r]]<-pred + T16_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_3_gblup_variances_all,"T16_3_gblup_variances_all_2001_3000.rds") +saveRDS(T16_3_gblup_prediction_all,"T16_3_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_3_gfblup_variances_all,"T16_3_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_3_gfblup_prediction_all,"T16_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_3_gfblup_validate_all,"T16_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_3_3001_4000.R b/code/using_GO/pla/T16_3_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..6164a25cca47cf3ebc7cc9fb0fdc893c22c47e36 --- /dev/null +++ b/code/using_GO/pla/T16_3_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_3_gblup_variances_all=rep(list(list()),cycles) +T16_3_gblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_variances_all=rep(list(list()),cycles) +T16_3_gfblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_3...") + y=pheno_df_pla$T16_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_3_gblup_variances_all[[r]]<-var + T16_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_3_gblup_variances_all[[r]]<-list() + T16_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_3_gfblup_variances_all[[r]]<-var + T16_3_gfblup_prediction_all[[r]]<-pred + T16_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_3_gblup_variances_all,"T16_3_gblup_variances_all_3001_4000.rds") +saveRDS(T16_3_gblup_prediction_all,"T16_3_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_3_gfblup_variances_all,"T16_3_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_3_gfblup_prediction_all,"T16_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_3_gfblup_validate_all,"T16_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_3_4001_5000.R b/code/using_GO/pla/T16_3_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..95234f4916fd78c9c8ebc9232e4e7c30091343d9 --- /dev/null +++ b/code/using_GO/pla/T16_3_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_3_gblup_variances_all=rep(list(list()),cycles) +T16_3_gblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_variances_all=rep(list(list()),cycles) +T16_3_gfblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_3...") + y=pheno_df_pla$T16_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_3_gblup_variances_all[[r]]<-var + T16_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_3_gblup_variances_all[[r]]<-list() + T16_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_3_gfblup_variances_all[[r]]<-var + T16_3_gfblup_prediction_all[[r]]<-pred + T16_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_3_gblup_variances_all,"T16_3_gblup_variances_all_4001_5000.rds") +saveRDS(T16_3_gblup_prediction_all,"T16_3_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_3_gfblup_variances_all,"T16_3_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_3_gfblup_prediction_all,"T16_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_3_gfblup_validate_all,"T16_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_3_5001_6000.R b/code/using_GO/pla/T16_3_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..13231fcd8e03c2e0376ab9d94eba213766943155 --- /dev/null +++ b/code/using_GO/pla/T16_3_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_3_gblup_variances_all=rep(list(list()),cycles) +T16_3_gblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_variances_all=rep(list(list()),cycles) +T16_3_gfblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_3...") + y=pheno_df_pla$T16_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_3_gblup_variances_all[[r]]<-var + T16_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_3_gblup_variances_all[[r]]<-list() + T16_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_3_gfblup_variances_all[[r]]<-var + T16_3_gfblup_prediction_all[[r]]<-pred + T16_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_3_gblup_variances_all,"T16_3_gblup_variances_all_5001_6000.rds") +saveRDS(T16_3_gblup_prediction_all,"T16_3_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_3_gfblup_variances_all,"T16_3_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_3_gfblup_prediction_all,"T16_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_3_gfblup_validate_all,"T16_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_3_6001_7297.R b/code/using_GO/pla/T16_3_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..9df06b48e4ea4ee271211d93d5143ca274ef0204 --- /dev/null +++ b/code/using_GO/pla/T16_3_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_3_gblup_variances_all=rep(list(list()),cycles) +T16_3_gblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_variances_all=rep(list(list()),cycles) +T16_3_gfblup_prediction_all=rep(list(list()),cycles) +T16_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_3...") + y=pheno_df_pla$T16_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_3_gblup_variances_all[[r]]<-var + T16_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_3_gblup_variances_all[[r]]<-list() + T16_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_3_gfblup_variances_all[[r]]<-var + T16_3_gfblup_prediction_all[[r]]<-pred + T16_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_3_gblup_variances_all,"T16_3_gblup_variances_all_6001_7297.rds") +saveRDS(T16_3_gblup_prediction_all,"T16_3_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_3_gfblup_variances_all,"T16_3_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_3_gfblup_prediction_all,"T16_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_3_gfblup_validate_all,"T16_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_4_1001_2000.R b/code/using_GO/pla/T16_4_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..e0d44efe163619c9040c0bc9a1f8e250798538b4 --- /dev/null +++ b/code/using_GO/pla/T16_4_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_4_gblup_variances_all=rep(list(list()),cycles) +T16_4_gblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_variances_all=rep(list(list()),cycles) +T16_4_gfblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_4...") + y=pheno_df_pla$T16_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_4_gblup_variances_all[[r]]<-var + T16_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_4_gblup_variances_all[[r]]<-list() + T16_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_4_gfblup_variances_all[[r]]<-var + T16_4_gfblup_prediction_all[[r]]<-pred + T16_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_4_gblup_variances_all,"T16_4_gblup_variances_all_1001_2000.rds") +saveRDS(T16_4_gblup_prediction_all,"T16_4_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_4_gfblup_variances_all,"T16_4_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_4_gfblup_prediction_all,"T16_4_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_4_gfblup_validate_all,"T16_4_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_4_1_1000.R b/code/using_GO/pla/T16_4_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..5cd0a6a0c0dbeb55b0d1aec3c37cd2f7b5ca2ce1 --- /dev/null +++ b/code/using_GO/pla/T16_4_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_4_gblup_variances_all=rep(list(list()),cycles) +T16_4_gblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_variances_all=rep(list(list()),cycles) +T16_4_gfblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_4...") + y=pheno_df_pla$T16_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_4_gblup_variances_all[[r]]<-var + T16_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_4_gblup_variances_all[[r]]<-list() + T16_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_4_gfblup_variances_all[[r]]<-var + T16_4_gfblup_prediction_all[[r]]<-pred + T16_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_4_gblup_variances_all,"T16_4_gblup_variances_all_1_1000.rds") +saveRDS(T16_4_gblup_prediction_all,"T16_4_gblup_prediction_all_1_1000.rds") +saveRDS(T16_4_gfblup_variances_all,"T16_4_gfblup_variances_all_1_1000.rds") +saveRDS(T16_4_gfblup_prediction_all,"T16_4_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_4_gfblup_validate_all,"T16_4_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_4_2001_3000.R b/code/using_GO/pla/T16_4_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..0f6358c70c4eb6a25ed87df2da3942e88f4bbcd8 --- /dev/null +++ b/code/using_GO/pla/T16_4_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_4_gblup_variances_all=rep(list(list()),cycles) +T16_4_gblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_variances_all=rep(list(list()),cycles) +T16_4_gfblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_4...") + y=pheno_df_pla$T16_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_4_gblup_variances_all[[r]]<-var + T16_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_4_gblup_variances_all[[r]]<-list() + T16_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_4_gfblup_variances_all[[r]]<-var + T16_4_gfblup_prediction_all[[r]]<-pred + T16_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_4_gblup_variances_all,"T16_4_gblup_variances_all_2001_3000.rds") +saveRDS(T16_4_gblup_prediction_all,"T16_4_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_4_gfblup_variances_all,"T16_4_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_4_gfblup_prediction_all,"T16_4_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_4_gfblup_validate_all,"T16_4_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_4_3001_4000.R b/code/using_GO/pla/T16_4_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..ed80345abf6e34ef09f036615d3bb7876914928e --- /dev/null +++ b/code/using_GO/pla/T16_4_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_4_gblup_variances_all=rep(list(list()),cycles) +T16_4_gblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_variances_all=rep(list(list()),cycles) +T16_4_gfblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_4...") + y=pheno_df_pla$T16_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_4_gblup_variances_all[[r]]<-var + T16_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_4_gblup_variances_all[[r]]<-list() + T16_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_4_gfblup_variances_all[[r]]<-var + T16_4_gfblup_prediction_all[[r]]<-pred + T16_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_4_gblup_variances_all,"T16_4_gblup_variances_all_3001_4000.rds") +saveRDS(T16_4_gblup_prediction_all,"T16_4_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_4_gfblup_variances_all,"T16_4_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_4_gfblup_prediction_all,"T16_4_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_4_gfblup_validate_all,"T16_4_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_4_4001_5000.R b/code/using_GO/pla/T16_4_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..af195531616501059639b34ab441a51c22ca3fff --- /dev/null +++ b/code/using_GO/pla/T16_4_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_4_gblup_variances_all=rep(list(list()),cycles) +T16_4_gblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_variances_all=rep(list(list()),cycles) +T16_4_gfblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_4...") + y=pheno_df_pla$T16_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_4_gblup_variances_all[[r]]<-var + T16_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_4_gblup_variances_all[[r]]<-list() + T16_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_4_gfblup_variances_all[[r]]<-var + T16_4_gfblup_prediction_all[[r]]<-pred + T16_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_4_gblup_variances_all,"T16_4_gblup_variances_all_4001_5000.rds") +saveRDS(T16_4_gblup_prediction_all,"T16_4_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_4_gfblup_variances_all,"T16_4_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_4_gfblup_prediction_all,"T16_4_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_4_gfblup_validate_all,"T16_4_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_4_5001_6000.R b/code/using_GO/pla/T16_4_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..6f9695befb00e3615817e7fe553190c34ca82ba0 --- /dev/null +++ b/code/using_GO/pla/T16_4_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_4_gblup_variances_all=rep(list(list()),cycles) +T16_4_gblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_variances_all=rep(list(list()),cycles) +T16_4_gfblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_4...") + y=pheno_df_pla$T16_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_4_gblup_variances_all[[r]]<-var + T16_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_4_gblup_variances_all[[r]]<-list() + T16_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_4_gfblup_variances_all[[r]]<-var + T16_4_gfblup_prediction_all[[r]]<-pred + T16_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_4_gblup_variances_all,"T16_4_gblup_variances_all_5001_6000.rds") +saveRDS(T16_4_gblup_prediction_all,"T16_4_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_4_gfblup_variances_all,"T16_4_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_4_gfblup_prediction_all,"T16_4_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_4_gfblup_validate_all,"T16_4_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_4_6001_7297.R b/code/using_GO/pla/T16_4_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..83f9c5e3cca93d1ef3b69c6fc778904fb659116f --- /dev/null +++ b/code/using_GO/pla/T16_4_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_4_gblup_variances_all=rep(list(list()),cycles) +T16_4_gblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_variances_all=rep(list(list()),cycles) +T16_4_gfblup_prediction_all=rep(list(list()),cycles) +T16_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_4...") + y=pheno_df_pla$T16_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_4_gblup_variances_all[[r]]<-var + T16_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_4_gblup_variances_all[[r]]<-list() + T16_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_4_gfblup_variances_all[[r]]<-var + T16_4_gfblup_prediction_all[[r]]<-pred + T16_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_4_gblup_variances_all,"T16_4_gblup_variances_all_6001_7297.rds") +saveRDS(T16_4_gblup_prediction_all,"T16_4_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_4_gfblup_variances_all,"T16_4_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_4_gfblup_prediction_all,"T16_4_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_4_gfblup_validate_all,"T16_4_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_5_1001_2000.R b/code/using_GO/pla/T16_5_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..ff653c6ff1d4409961b8cf66b2c280bf67640012 --- /dev/null +++ b/code/using_GO/pla/T16_5_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_5_gblup_variances_all=rep(list(list()),cycles) +T16_5_gblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_variances_all=rep(list(list()),cycles) +T16_5_gfblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_5...") + y=pheno_df_pla$T16_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_5_gblup_variances_all[[r]]<-var + T16_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_5_gblup_variances_all[[r]]<-list() + T16_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_5_gfblup_variances_all[[r]]<-var + T16_5_gfblup_prediction_all[[r]]<-pred + T16_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_5_gblup_variances_all,"T16_5_gblup_variances_all_1001_2000.rds") +saveRDS(T16_5_gblup_prediction_all,"T16_5_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_5_gfblup_variances_all,"T16_5_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_5_gfblup_prediction_all,"T16_5_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_5_gfblup_validate_all,"T16_5_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_5_1_1000.R b/code/using_GO/pla/T16_5_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..4d3c582f5cb6faa884c614cca064b3e0a74b34a2 --- /dev/null +++ b/code/using_GO/pla/T16_5_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_5_gblup_variances_all=rep(list(list()),cycles) +T16_5_gblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_variances_all=rep(list(list()),cycles) +T16_5_gfblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_5...") + y=pheno_df_pla$T16_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_5_gblup_variances_all[[r]]<-var + T16_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_5_gblup_variances_all[[r]]<-list() + T16_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_5_gfblup_variances_all[[r]]<-var + T16_5_gfblup_prediction_all[[r]]<-pred + T16_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_5_gblup_variances_all,"T16_5_gblup_variances_all_1_1000.rds") +saveRDS(T16_5_gblup_prediction_all,"T16_5_gblup_prediction_all_1_1000.rds") +saveRDS(T16_5_gfblup_variances_all,"T16_5_gfblup_variances_all_1_1000.rds") +saveRDS(T16_5_gfblup_prediction_all,"T16_5_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_5_gfblup_validate_all,"T16_5_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_5_2001_3000.R b/code/using_GO/pla/T16_5_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..7c24bf9a9e6bc7a1b0dae22e47ea74a9e8ffc7e1 --- /dev/null +++ b/code/using_GO/pla/T16_5_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_5_gblup_variances_all=rep(list(list()),cycles) +T16_5_gblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_variances_all=rep(list(list()),cycles) +T16_5_gfblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_5...") + y=pheno_df_pla$T16_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_5_gblup_variances_all[[r]]<-var + T16_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_5_gblup_variances_all[[r]]<-list() + T16_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_5_gfblup_variances_all[[r]]<-var + T16_5_gfblup_prediction_all[[r]]<-pred + T16_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_5_gblup_variances_all,"T16_5_gblup_variances_all_2001_3000.rds") +saveRDS(T16_5_gblup_prediction_all,"T16_5_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_5_gfblup_variances_all,"T16_5_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_5_gfblup_prediction_all,"T16_5_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_5_gfblup_validate_all,"T16_5_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_5_3001_4000.R b/code/using_GO/pla/T16_5_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..ca125a1bb01bdd64bf60dca1466a54eed15ca56c --- /dev/null +++ b/code/using_GO/pla/T16_5_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_5_gblup_variances_all=rep(list(list()),cycles) +T16_5_gblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_variances_all=rep(list(list()),cycles) +T16_5_gfblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_5...") + y=pheno_df_pla$T16_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_5_gblup_variances_all[[r]]<-var + T16_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_5_gblup_variances_all[[r]]<-list() + T16_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_5_gfblup_variances_all[[r]]<-var + T16_5_gfblup_prediction_all[[r]]<-pred + T16_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_5_gblup_variances_all,"T16_5_gblup_variances_all_3001_4000.rds") +saveRDS(T16_5_gblup_prediction_all,"T16_5_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_5_gfblup_variances_all,"T16_5_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_5_gfblup_prediction_all,"T16_5_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_5_gfblup_validate_all,"T16_5_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_5_4001_5000.R b/code/using_GO/pla/T16_5_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..51fb1d55ae64187f984d1ccb6a25e02e0e97ff9a --- /dev/null +++ b/code/using_GO/pla/T16_5_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_5_gblup_variances_all=rep(list(list()),cycles) +T16_5_gblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_variances_all=rep(list(list()),cycles) +T16_5_gfblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_5...") + y=pheno_df_pla$T16_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_5_gblup_variances_all[[r]]<-var + T16_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_5_gblup_variances_all[[r]]<-list() + T16_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_5_gfblup_variances_all[[r]]<-var + T16_5_gfblup_prediction_all[[r]]<-pred + T16_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_5_gblup_variances_all,"T16_5_gblup_variances_all_4001_5000.rds") +saveRDS(T16_5_gblup_prediction_all,"T16_5_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_5_gfblup_variances_all,"T16_5_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_5_gfblup_prediction_all,"T16_5_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_5_gfblup_validate_all,"T16_5_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_5_5001_6000.R b/code/using_GO/pla/T16_5_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..8c07cb013bc18e6e1b55ecfc1fc85369eca68ee7 --- /dev/null +++ b/code/using_GO/pla/T16_5_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_5_gblup_variances_all=rep(list(list()),cycles) +T16_5_gblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_variances_all=rep(list(list()),cycles) +T16_5_gfblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_5...") + y=pheno_df_pla$T16_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_5_gblup_variances_all[[r]]<-var + T16_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_5_gblup_variances_all[[r]]<-list() + T16_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_5_gfblup_variances_all[[r]]<-var + T16_5_gfblup_prediction_all[[r]]<-pred + T16_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_5_gblup_variances_all,"T16_5_gblup_variances_all_5001_6000.rds") +saveRDS(T16_5_gblup_prediction_all,"T16_5_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_5_gfblup_variances_all,"T16_5_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_5_gfblup_prediction_all,"T16_5_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_5_gfblup_validate_all,"T16_5_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_5_6001_7297.R b/code/using_GO/pla/T16_5_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..5f8f3a12f2c305f083a4d85ab0b75e85de4febfa --- /dev/null +++ b/code/using_GO/pla/T16_5_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_5_gblup_variances_all=rep(list(list()),cycles) +T16_5_gblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_variances_all=rep(list(list()),cycles) +T16_5_gfblup_prediction_all=rep(list(list()),cycles) +T16_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_5...") + y=pheno_df_pla$T16_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_5_gblup_variances_all[[r]]<-var + T16_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_5_gblup_variances_all[[r]]<-list() + T16_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_5_gfblup_variances_all[[r]]<-var + T16_5_gfblup_prediction_all[[r]]<-pred + T16_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_5_gblup_variances_all,"T16_5_gblup_variances_all_6001_7297.rds") +saveRDS(T16_5_gblup_prediction_all,"T16_5_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_5_gfblup_variances_all,"T16_5_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_5_gfblup_prediction_all,"T16_5_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_5_gfblup_validate_all,"T16_5_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_6_1001_2000.R b/code/using_GO/pla/T16_6_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..fb207ad446619c0e4547fc3a4c2de1e8069047ed --- /dev/null +++ b/code/using_GO/pla/T16_6_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_6_gblup_variances_all=rep(list(list()),cycles) +T16_6_gblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_variances_all=rep(list(list()),cycles) +T16_6_gfblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_6...") + y=pheno_df_pla$T16_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_6_gblup_variances_all[[r]]<-var + T16_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_6_gblup_variances_all[[r]]<-list() + T16_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_6_gfblup_variances_all[[r]]<-var + T16_6_gfblup_prediction_all[[r]]<-pred + T16_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_6_gblup_variances_all,"T16_6_gblup_variances_all_1001_2000.rds") +saveRDS(T16_6_gblup_prediction_all,"T16_6_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_6_gfblup_variances_all,"T16_6_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_6_gfblup_prediction_all,"T16_6_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_6_gfblup_validate_all,"T16_6_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_6_1_1000.R b/code/using_GO/pla/T16_6_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..dcabdb75a5d8af5a22a71cfec3e52771504eeda2 --- /dev/null +++ b/code/using_GO/pla/T16_6_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_6_gblup_variances_all=rep(list(list()),cycles) +T16_6_gblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_variances_all=rep(list(list()),cycles) +T16_6_gfblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_6...") + y=pheno_df_pla$T16_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_6_gblup_variances_all[[r]]<-var + T16_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_6_gblup_variances_all[[r]]<-list() + T16_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_6_gfblup_variances_all[[r]]<-var + T16_6_gfblup_prediction_all[[r]]<-pred + T16_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_6_gblup_variances_all,"T16_6_gblup_variances_all_1_1000.rds") +saveRDS(T16_6_gblup_prediction_all,"T16_6_gblup_prediction_all_1_1000.rds") +saveRDS(T16_6_gfblup_variances_all,"T16_6_gfblup_variances_all_1_1000.rds") +saveRDS(T16_6_gfblup_prediction_all,"T16_6_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_6_gfblup_validate_all,"T16_6_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_6_2001_3000.R b/code/using_GO/pla/T16_6_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..8fc0e62ba7895daf322c325203196eccf2572da7 --- /dev/null +++ b/code/using_GO/pla/T16_6_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_6_gblup_variances_all=rep(list(list()),cycles) +T16_6_gblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_variances_all=rep(list(list()),cycles) +T16_6_gfblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_6...") + y=pheno_df_pla$T16_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_6_gblup_variances_all[[r]]<-var + T16_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_6_gblup_variances_all[[r]]<-list() + T16_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_6_gfblup_variances_all[[r]]<-var + T16_6_gfblup_prediction_all[[r]]<-pred + T16_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_6_gblup_variances_all,"T16_6_gblup_variances_all_2001_3000.rds") +saveRDS(T16_6_gblup_prediction_all,"T16_6_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_6_gfblup_variances_all,"T16_6_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_6_gfblup_prediction_all,"T16_6_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_6_gfblup_validate_all,"T16_6_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_6_3001_4000.R b/code/using_GO/pla/T16_6_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..38e7f577dc79d6ddac436a3b88c60ce5907cf1d9 --- /dev/null +++ b/code/using_GO/pla/T16_6_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_6_gblup_variances_all=rep(list(list()),cycles) +T16_6_gblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_variances_all=rep(list(list()),cycles) +T16_6_gfblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_6...") + y=pheno_df_pla$T16_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_6_gblup_variances_all[[r]]<-var + T16_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_6_gblup_variances_all[[r]]<-list() + T16_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_6_gfblup_variances_all[[r]]<-var + T16_6_gfblup_prediction_all[[r]]<-pred + T16_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_6_gblup_variances_all,"T16_6_gblup_variances_all_3001_4000.rds") +saveRDS(T16_6_gblup_prediction_all,"T16_6_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_6_gfblup_variances_all,"T16_6_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_6_gfblup_prediction_all,"T16_6_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_6_gfblup_validate_all,"T16_6_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_6_4001_5000.R b/code/using_GO/pla/T16_6_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..7dd77a07df88fe777687c4d083bec2c7cb239c88 --- /dev/null +++ b/code/using_GO/pla/T16_6_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_6_gblup_variances_all=rep(list(list()),cycles) +T16_6_gblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_variances_all=rep(list(list()),cycles) +T16_6_gfblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_6...") + y=pheno_df_pla$T16_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_6_gblup_variances_all[[r]]<-var + T16_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_6_gblup_variances_all[[r]]<-list() + T16_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_6_gfblup_variances_all[[r]]<-var + T16_6_gfblup_prediction_all[[r]]<-pred + T16_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_6_gblup_variances_all,"T16_6_gblup_variances_all_4001_5000.rds") +saveRDS(T16_6_gblup_prediction_all,"T16_6_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_6_gfblup_variances_all,"T16_6_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_6_gfblup_prediction_all,"T16_6_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_6_gfblup_validate_all,"T16_6_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_6_5001_6000.R b/code/using_GO/pla/T16_6_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..0d1b710b021d949f6020519c206dce6ecc33656a --- /dev/null +++ b/code/using_GO/pla/T16_6_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_6_gblup_variances_all=rep(list(list()),cycles) +T16_6_gblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_variances_all=rep(list(list()),cycles) +T16_6_gfblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_6...") + y=pheno_df_pla$T16_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_6_gblup_variances_all[[r]]<-var + T16_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_6_gblup_variances_all[[r]]<-list() + T16_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_6_gfblup_variances_all[[r]]<-var + T16_6_gfblup_prediction_all[[r]]<-pred + T16_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_6_gblup_variances_all,"T16_6_gblup_variances_all_5001_6000.rds") +saveRDS(T16_6_gblup_prediction_all,"T16_6_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_6_gfblup_variances_all,"T16_6_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_6_gfblup_prediction_all,"T16_6_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_6_gfblup_validate_all,"T16_6_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_6_6001_7297.R b/code/using_GO/pla/T16_6_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..803624b3978025cd738c4aa4a7b2033b15eb8074 --- /dev/null +++ b/code/using_GO/pla/T16_6_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_6_gblup_variances_all=rep(list(list()),cycles) +T16_6_gblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_variances_all=rep(list(list()),cycles) +T16_6_gfblup_prediction_all=rep(list(list()),cycles) +T16_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_6...") + y=pheno_df_pla$T16_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_6_gblup_variances_all[[r]]<-var + T16_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_6_gblup_variances_all[[r]]<-list() + T16_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_6_gfblup_variances_all[[r]]<-var + T16_6_gfblup_prediction_all[[r]]<-pred + T16_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_6_gblup_variances_all,"T16_6_gblup_variances_all_6001_7297.rds") +saveRDS(T16_6_gblup_prediction_all,"T16_6_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_6_gfblup_variances_all,"T16_6_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_6_gfblup_prediction_all,"T16_6_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_6_gfblup_validate_all,"T16_6_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_7_1001_2000.R b/code/using_GO/pla/T16_7_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..a39cc0cce8145c6c7242e32520a3b00ffcfb104f --- /dev/null +++ b/code/using_GO/pla/T16_7_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_7_gblup_variances_all=rep(list(list()),cycles) +T16_7_gblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_variances_all=rep(list(list()),cycles) +T16_7_gfblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_7...") + y=pheno_df_pla$T16_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_7_gblup_variances_all[[r]]<-var + T16_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_7_gblup_variances_all[[r]]<-list() + T16_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_7_gfblup_variances_all[[r]]<-var + T16_7_gfblup_prediction_all[[r]]<-pred + T16_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_7_gblup_variances_all,"T16_7_gblup_variances_all_1001_2000.rds") +saveRDS(T16_7_gblup_prediction_all,"T16_7_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_7_gfblup_variances_all,"T16_7_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_7_gfblup_prediction_all,"T16_7_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_7_gfblup_validate_all,"T16_7_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_7_1_1000.R b/code/using_GO/pla/T16_7_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..a39d072c948a11a669d097267081c4003f51d864 --- /dev/null +++ b/code/using_GO/pla/T16_7_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_7_gblup_variances_all=rep(list(list()),cycles) +T16_7_gblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_variances_all=rep(list(list()),cycles) +T16_7_gfblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_7...") + y=pheno_df_pla$T16_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_7_gblup_variances_all[[r]]<-var + T16_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_7_gblup_variances_all[[r]]<-list() + T16_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_7_gfblup_variances_all[[r]]<-var + T16_7_gfblup_prediction_all[[r]]<-pred + T16_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_7_gblup_variances_all,"T16_7_gblup_variances_all_1_1000.rds") +saveRDS(T16_7_gblup_prediction_all,"T16_7_gblup_prediction_all_1_1000.rds") +saveRDS(T16_7_gfblup_variances_all,"T16_7_gfblup_variances_all_1_1000.rds") +saveRDS(T16_7_gfblup_prediction_all,"T16_7_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_7_gfblup_validate_all,"T16_7_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_7_2001_3000.R b/code/using_GO/pla/T16_7_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..bab3ffc71816f5b8caeb14d6b180b3ed9d9c9895 --- /dev/null +++ b/code/using_GO/pla/T16_7_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_7_gblup_variances_all=rep(list(list()),cycles) +T16_7_gblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_variances_all=rep(list(list()),cycles) +T16_7_gfblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_7...") + y=pheno_df_pla$T16_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_7_gblup_variances_all[[r]]<-var + T16_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_7_gblup_variances_all[[r]]<-list() + T16_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_7_gfblup_variances_all[[r]]<-var + T16_7_gfblup_prediction_all[[r]]<-pred + T16_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_7_gblup_variances_all,"T16_7_gblup_variances_all_2001_3000.rds") +saveRDS(T16_7_gblup_prediction_all,"T16_7_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_7_gfblup_variances_all,"T16_7_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_7_gfblup_prediction_all,"T16_7_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_7_gfblup_validate_all,"T16_7_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_7_3001_4000.R b/code/using_GO/pla/T16_7_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..57956bd8db6de36f24c3df2c0fbe69bcbde4e231 --- /dev/null +++ b/code/using_GO/pla/T16_7_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_7_gblup_variances_all=rep(list(list()),cycles) +T16_7_gblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_variances_all=rep(list(list()),cycles) +T16_7_gfblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_7...") + y=pheno_df_pla$T16_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_7_gblup_variances_all[[r]]<-var + T16_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_7_gblup_variances_all[[r]]<-list() + T16_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_7_gfblup_variances_all[[r]]<-var + T16_7_gfblup_prediction_all[[r]]<-pred + T16_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_7_gblup_variances_all,"T16_7_gblup_variances_all_3001_4000.rds") +saveRDS(T16_7_gblup_prediction_all,"T16_7_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_7_gfblup_variances_all,"T16_7_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_7_gfblup_prediction_all,"T16_7_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_7_gfblup_validate_all,"T16_7_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_7_4001_5000.R b/code/using_GO/pla/T16_7_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..43161859ac830b7448dd1cb55ed17eafe3bd4ad3 --- /dev/null +++ b/code/using_GO/pla/T16_7_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_7_gblup_variances_all=rep(list(list()),cycles) +T16_7_gblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_variances_all=rep(list(list()),cycles) +T16_7_gfblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_7...") + y=pheno_df_pla$T16_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_7_gblup_variances_all[[r]]<-var + T16_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_7_gblup_variances_all[[r]]<-list() + T16_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_7_gfblup_variances_all[[r]]<-var + T16_7_gfblup_prediction_all[[r]]<-pred + T16_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_7_gblup_variances_all,"T16_7_gblup_variances_all_4001_5000.rds") +saveRDS(T16_7_gblup_prediction_all,"T16_7_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_7_gfblup_variances_all,"T16_7_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_7_gfblup_prediction_all,"T16_7_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_7_gfblup_validate_all,"T16_7_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_7_5001_6000.R b/code/using_GO/pla/T16_7_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..97b443982bd9856e1c330339ac767e44457fa6f8 --- /dev/null +++ b/code/using_GO/pla/T16_7_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_7_gblup_variances_all=rep(list(list()),cycles) +T16_7_gblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_variances_all=rep(list(list()),cycles) +T16_7_gfblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_7...") + y=pheno_df_pla$T16_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_7_gblup_variances_all[[r]]<-var + T16_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_7_gblup_variances_all[[r]]<-list() + T16_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_7_gfblup_variances_all[[r]]<-var + T16_7_gfblup_prediction_all[[r]]<-pred + T16_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_7_gblup_variances_all,"T16_7_gblup_variances_all_5001_6000.rds") +saveRDS(T16_7_gblup_prediction_all,"T16_7_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_7_gfblup_variances_all,"T16_7_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_7_gfblup_prediction_all,"T16_7_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_7_gfblup_validate_all,"T16_7_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_7_6001_7297.R b/code/using_GO/pla/T16_7_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..db9166b4f85f59845adf994a26ada0201a5bb93a --- /dev/null +++ b/code/using_GO/pla/T16_7_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_7_gblup_variances_all=rep(list(list()),cycles) +T16_7_gblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_variances_all=rep(list(list()),cycles) +T16_7_gfblup_prediction_all=rep(list(list()),cycles) +T16_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_7...") + y=pheno_df_pla$T16_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_7_gblup_variances_all[[r]]<-var + T16_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_7_gblup_variances_all[[r]]<-list() + T16_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_7_gfblup_variances_all[[r]]<-var + T16_7_gfblup_prediction_all[[r]]<-pred + T16_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_7_gblup_variances_all,"T16_7_gblup_variances_all_6001_7297.rds") +saveRDS(T16_7_gblup_prediction_all,"T16_7_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_7_gfblup_variances_all,"T16_7_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_7_gfblup_prediction_all,"T16_7_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_7_gfblup_validate_all,"T16_7_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_8_1001_2000.R b/code/using_GO/pla/T16_8_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..79d4a6a3628bd3e5a81ca67aaddefd84e31ed506 --- /dev/null +++ b/code/using_GO/pla/T16_8_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_8_gblup_variances_all=rep(list(list()),cycles) +T16_8_gblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_variances_all=rep(list(list()),cycles) +T16_8_gfblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_8...") + y=pheno_df_pla$T16_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_8_gblup_variances_all[[r]]<-var + T16_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_8_gblup_variances_all[[r]]<-list() + T16_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_8_gfblup_variances_all[[r]]<-var + T16_8_gfblup_prediction_all[[r]]<-pred + T16_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_8_gblup_variances_all,"T16_8_gblup_variances_all_1001_2000.rds") +saveRDS(T16_8_gblup_prediction_all,"T16_8_gblup_prediction_all_1001_2000.rds") +saveRDS(T16_8_gfblup_variances_all,"T16_8_gfblup_variances_all_1001_2000.rds") +saveRDS(T16_8_gfblup_prediction_all,"T16_8_gfblup_prediction_all_1001_2000.rds") +saveRDS(T16_8_gfblup_validate_all,"T16_8_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_8_1_1000.R b/code/using_GO/pla/T16_8_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..781b0fa0812c753efdc9412d8d2b4243186035b4 --- /dev/null +++ b/code/using_GO/pla/T16_8_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_8_gblup_variances_all=rep(list(list()),cycles) +T16_8_gblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_variances_all=rep(list(list()),cycles) +T16_8_gfblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_8...") + y=pheno_df_pla$T16_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_8_gblup_variances_all[[r]]<-var + T16_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_8_gblup_variances_all[[r]]<-list() + T16_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_8_gfblup_variances_all[[r]]<-var + T16_8_gfblup_prediction_all[[r]]<-pred + T16_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_8_gblup_variances_all,"T16_8_gblup_variances_all_1_1000.rds") +saveRDS(T16_8_gblup_prediction_all,"T16_8_gblup_prediction_all_1_1000.rds") +saveRDS(T16_8_gfblup_variances_all,"T16_8_gfblup_variances_all_1_1000.rds") +saveRDS(T16_8_gfblup_prediction_all,"T16_8_gfblup_prediction_all_1_1000.rds") +saveRDS(T16_8_gfblup_validate_all,"T16_8_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_8_2001_3000.R b/code/using_GO/pla/T16_8_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..f1a1c6cac18f9d6c6fa2b55c8c76883c66c182a1 --- /dev/null +++ b/code/using_GO/pla/T16_8_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_8_gblup_variances_all=rep(list(list()),cycles) +T16_8_gblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_variances_all=rep(list(list()),cycles) +T16_8_gfblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_8...") + y=pheno_df_pla$T16_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_8_gblup_variances_all[[r]]<-var + T16_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_8_gblup_variances_all[[r]]<-list() + T16_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_8_gfblup_variances_all[[r]]<-var + T16_8_gfblup_prediction_all[[r]]<-pred + T16_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_8_gblup_variances_all,"T16_8_gblup_variances_all_2001_3000.rds") +saveRDS(T16_8_gblup_prediction_all,"T16_8_gblup_prediction_all_2001_3000.rds") +saveRDS(T16_8_gfblup_variances_all,"T16_8_gfblup_variances_all_2001_3000.rds") +saveRDS(T16_8_gfblup_prediction_all,"T16_8_gfblup_prediction_all_2001_3000.rds") +saveRDS(T16_8_gfblup_validate_all,"T16_8_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_8_3001_4000.R b/code/using_GO/pla/T16_8_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..1969932474e34a59a196fda10d2685e7a36b544a --- /dev/null +++ b/code/using_GO/pla/T16_8_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_8_gblup_variances_all=rep(list(list()),cycles) +T16_8_gblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_variances_all=rep(list(list()),cycles) +T16_8_gfblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_8...") + y=pheno_df_pla$T16_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_8_gblup_variances_all[[r]]<-var + T16_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_8_gblup_variances_all[[r]]<-list() + T16_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_8_gfblup_variances_all[[r]]<-var + T16_8_gfblup_prediction_all[[r]]<-pred + T16_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_8_gblup_variances_all,"T16_8_gblup_variances_all_3001_4000.rds") +saveRDS(T16_8_gblup_prediction_all,"T16_8_gblup_prediction_all_3001_4000.rds") +saveRDS(T16_8_gfblup_variances_all,"T16_8_gfblup_variances_all_3001_4000.rds") +saveRDS(T16_8_gfblup_prediction_all,"T16_8_gfblup_prediction_all_3001_4000.rds") +saveRDS(T16_8_gfblup_validate_all,"T16_8_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_8_4001_5000.R b/code/using_GO/pla/T16_8_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..13cd2b5d7e61203b9acfbc82c1dc0edd514d5191 --- /dev/null +++ b/code/using_GO/pla/T16_8_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_8_gblup_variances_all=rep(list(list()),cycles) +T16_8_gblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_variances_all=rep(list(list()),cycles) +T16_8_gfblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_8...") + y=pheno_df_pla$T16_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_8_gblup_variances_all[[r]]<-var + T16_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_8_gblup_variances_all[[r]]<-list() + T16_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_8_gfblup_variances_all[[r]]<-var + T16_8_gfblup_prediction_all[[r]]<-pred + T16_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_8_gblup_variances_all,"T16_8_gblup_variances_all_4001_5000.rds") +saveRDS(T16_8_gblup_prediction_all,"T16_8_gblup_prediction_all_4001_5000.rds") +saveRDS(T16_8_gfblup_variances_all,"T16_8_gfblup_variances_all_4001_5000.rds") +saveRDS(T16_8_gfblup_prediction_all,"T16_8_gfblup_prediction_all_4001_5000.rds") +saveRDS(T16_8_gfblup_validate_all,"T16_8_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_8_5001_6000.R b/code/using_GO/pla/T16_8_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..02ed2cb1f197ca929dec41fc19deb91e86d0957a --- /dev/null +++ b/code/using_GO/pla/T16_8_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_8_gblup_variances_all=rep(list(list()),cycles) +T16_8_gblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_variances_all=rep(list(list()),cycles) +T16_8_gfblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_8...") + y=pheno_df_pla$T16_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_8_gblup_variances_all[[r]]<-var + T16_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_8_gblup_variances_all[[r]]<-list() + T16_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_8_gfblup_variances_all[[r]]<-var + T16_8_gfblup_prediction_all[[r]]<-pred + T16_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_8_gblup_variances_all,"T16_8_gblup_variances_all_5001_6000.rds") +saveRDS(T16_8_gblup_prediction_all,"T16_8_gblup_prediction_all_5001_6000.rds") +saveRDS(T16_8_gfblup_variances_all,"T16_8_gfblup_variances_all_5001_6000.rds") +saveRDS(T16_8_gfblup_prediction_all,"T16_8_gfblup_prediction_all_5001_6000.rds") +saveRDS(T16_8_gfblup_validate_all,"T16_8_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T16_8_6001_7297.R b/code/using_GO/pla/T16_8_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..af6422c90d47c62142c2fd84b323424b20f8d410 --- /dev/null +++ b/code/using_GO/pla/T16_8_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T16_8_gblup_variances_all=rep(list(list()),cycles) +T16_8_gblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_variances_all=rep(list(list()),cycles) +T16_8_gfblup_prediction_all=rep(list(list()),cycles) +T16_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T16_8...") + y=pheno_df_pla$T16_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T16_8_gblup_variances_all[[r]]<-var + T16_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T16_8_gblup_variances_all[[r]]<-list() + T16_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T16_8_gfblup_variances_all[[r]]<-var + T16_8_gfblup_prediction_all[[r]]<-pred + T16_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T16_8_gblup_variances_all,"T16_8_gblup_variances_all_6001_7297.rds") +saveRDS(T16_8_gblup_prediction_all,"T16_8_gblup_prediction_all_6001_7297.rds") +saveRDS(T16_8_gfblup_variances_all,"T16_8_gfblup_variances_all_6001_7297.rds") +saveRDS(T16_8_gfblup_prediction_all,"T16_8_gfblup_prediction_all_6001_7297.rds") +saveRDS(T16_8_gfblup_validate_all,"T16_8_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_1_1001_2000.R b/code/using_GO/pla/T17_1_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..4d1f06a11b28dd869327e496793eaa50f0d7c328 --- /dev/null +++ b/code/using_GO/pla/T17_1_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_1_gblup_variances_all=rep(list(list()),cycles) +T17_1_gblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_variances_all=rep(list(list()),cycles) +T17_1_gfblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_1...") + y=pheno_df_pla$T17_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_1_gblup_variances_all[[r]]<-var + T17_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_1_gblup_variances_all[[r]]<-list() + T17_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_1_gfblup_variances_all[[r]]<-var + T17_1_gfblup_prediction_all[[r]]<-pred + T17_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_1_gblup_variances_all,"T17_1_gblup_variances_all_1001_2000.rds") +saveRDS(T17_1_gblup_prediction_all,"T17_1_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_1_gfblup_variances_all,"T17_1_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_1_gfblup_prediction_all,"T17_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_1_gfblup_validate_all,"T17_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_1_1_1000.R b/code/using_GO/pla/T17_1_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..f1828d52f0f3a72b095ee1b349129365fd10f949 --- /dev/null +++ b/code/using_GO/pla/T17_1_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_1_gblup_variances_all=rep(list(list()),cycles) +T17_1_gblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_variances_all=rep(list(list()),cycles) +T17_1_gfblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_1...") + y=pheno_df_pla$T17_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_1_gblup_variances_all[[r]]<-var + T17_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_1_gblup_variances_all[[r]]<-list() + T17_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_1_gfblup_variances_all[[r]]<-var + T17_1_gfblup_prediction_all[[r]]<-pred + T17_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_1_gblup_variances_all,"T17_1_gblup_variances_all_1_1000.rds") +saveRDS(T17_1_gblup_prediction_all,"T17_1_gblup_prediction_all_1_1000.rds") +saveRDS(T17_1_gfblup_variances_all,"T17_1_gfblup_variances_all_1_1000.rds") +saveRDS(T17_1_gfblup_prediction_all,"T17_1_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_1_gfblup_validate_all,"T17_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_1_2001_3000.R b/code/using_GO/pla/T17_1_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..3fa741e7e584028cef23ced0c1f288df4189d52f --- /dev/null +++ b/code/using_GO/pla/T17_1_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_1_gblup_variances_all=rep(list(list()),cycles) +T17_1_gblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_variances_all=rep(list(list()),cycles) +T17_1_gfblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_1...") + y=pheno_df_pla$T17_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_1_gblup_variances_all[[r]]<-var + T17_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_1_gblup_variances_all[[r]]<-list() + T17_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_1_gfblup_variances_all[[r]]<-var + T17_1_gfblup_prediction_all[[r]]<-pred + T17_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_1_gblup_variances_all,"T17_1_gblup_variances_all_2001_3000.rds") +saveRDS(T17_1_gblup_prediction_all,"T17_1_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_1_gfblup_variances_all,"T17_1_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_1_gfblup_prediction_all,"T17_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_1_gfblup_validate_all,"T17_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_1_3001_4000.R b/code/using_GO/pla/T17_1_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..c780745bf674c043b321d0e7512019b4862e31f1 --- /dev/null +++ b/code/using_GO/pla/T17_1_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_1_gblup_variances_all=rep(list(list()),cycles) +T17_1_gblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_variances_all=rep(list(list()),cycles) +T17_1_gfblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_1...") + y=pheno_df_pla$T17_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_1_gblup_variances_all[[r]]<-var + T17_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_1_gblup_variances_all[[r]]<-list() + T17_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_1_gfblup_variances_all[[r]]<-var + T17_1_gfblup_prediction_all[[r]]<-pred + T17_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_1_gblup_variances_all,"T17_1_gblup_variances_all_3001_4000.rds") +saveRDS(T17_1_gblup_prediction_all,"T17_1_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_1_gfblup_variances_all,"T17_1_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_1_gfblup_prediction_all,"T17_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_1_gfblup_validate_all,"T17_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_1_4001_5000.R b/code/using_GO/pla/T17_1_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..b21d59183cd203b06541ff8d4caaa5803ed81da2 --- /dev/null +++ b/code/using_GO/pla/T17_1_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_1_gblup_variances_all=rep(list(list()),cycles) +T17_1_gblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_variances_all=rep(list(list()),cycles) +T17_1_gfblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_1...") + y=pheno_df_pla$T17_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_1_gblup_variances_all[[r]]<-var + T17_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_1_gblup_variances_all[[r]]<-list() + T17_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_1_gfblup_variances_all[[r]]<-var + T17_1_gfblup_prediction_all[[r]]<-pred + T17_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_1_gblup_variances_all,"T17_1_gblup_variances_all_4001_5000.rds") +saveRDS(T17_1_gblup_prediction_all,"T17_1_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_1_gfblup_variances_all,"T17_1_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_1_gfblup_prediction_all,"T17_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_1_gfblup_validate_all,"T17_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_1_5001_6000.R b/code/using_GO/pla/T17_1_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..d3b15f22a6bdb336db32732eb2734847532ccffb --- /dev/null +++ b/code/using_GO/pla/T17_1_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_1_gblup_variances_all=rep(list(list()),cycles) +T17_1_gblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_variances_all=rep(list(list()),cycles) +T17_1_gfblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_1...") + y=pheno_df_pla$T17_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_1_gblup_variances_all[[r]]<-var + T17_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_1_gblup_variances_all[[r]]<-list() + T17_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_1_gfblup_variances_all[[r]]<-var + T17_1_gfblup_prediction_all[[r]]<-pred + T17_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_1_gblup_variances_all,"T17_1_gblup_variances_all_5001_6000.rds") +saveRDS(T17_1_gblup_prediction_all,"T17_1_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_1_gfblup_variances_all,"T17_1_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_1_gfblup_prediction_all,"T17_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_1_gfblup_validate_all,"T17_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_1_6001_7297.R b/code/using_GO/pla/T17_1_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..d99c2155dd83a56994305485405223ace5a2363d --- /dev/null +++ b/code/using_GO/pla/T17_1_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_1_gblup_variances_all=rep(list(list()),cycles) +T17_1_gblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_variances_all=rep(list(list()),cycles) +T17_1_gfblup_prediction_all=rep(list(list()),cycles) +T17_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_1...") + y=pheno_df_pla$T17_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_1_gblup_variances_all[[r]]<-var + T17_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_1_gblup_variances_all[[r]]<-list() + T17_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_1_gfblup_variances_all[[r]]<-var + T17_1_gfblup_prediction_all[[r]]<-pred + T17_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_1_gblup_variances_all,"T17_1_gblup_variances_all_6001_7297.rds") +saveRDS(T17_1_gblup_prediction_all,"T17_1_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_1_gfblup_variances_all,"T17_1_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_1_gfblup_prediction_all,"T17_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_1_gfblup_validate_all,"T17_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_2_1001_2000.R b/code/using_GO/pla/T17_2_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..c9ab35722f314847504755dd666f7c7ccbc7706b --- /dev/null +++ b/code/using_GO/pla/T17_2_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_2_gblup_variances_all=rep(list(list()),cycles) +T17_2_gblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_variances_all=rep(list(list()),cycles) +T17_2_gfblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_2...") + y=pheno_df_pla$T17_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_2_gblup_variances_all[[r]]<-var + T17_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_2_gblup_variances_all[[r]]<-list() + T17_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_2_gfblup_variances_all[[r]]<-var + T17_2_gfblup_prediction_all[[r]]<-pred + T17_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_2_gblup_variances_all,"T17_2_gblup_variances_all_1001_2000.rds") +saveRDS(T17_2_gblup_prediction_all,"T17_2_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_2_gfblup_variances_all,"T17_2_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_2_gfblup_prediction_all,"T17_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_2_gfblup_validate_all,"T17_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_2_1_1000.R b/code/using_GO/pla/T17_2_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..21c36380f0332689760fe944626ffcb2d4312bbe --- /dev/null +++ b/code/using_GO/pla/T17_2_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_2_gblup_variances_all=rep(list(list()),cycles) +T17_2_gblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_variances_all=rep(list(list()),cycles) +T17_2_gfblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_2...") + y=pheno_df_pla$T17_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_2_gblup_variances_all[[r]]<-var + T17_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_2_gblup_variances_all[[r]]<-list() + T17_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_2_gfblup_variances_all[[r]]<-var + T17_2_gfblup_prediction_all[[r]]<-pred + T17_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_2_gblup_variances_all,"T17_2_gblup_variances_all_1_1000.rds") +saveRDS(T17_2_gblup_prediction_all,"T17_2_gblup_prediction_all_1_1000.rds") +saveRDS(T17_2_gfblup_variances_all,"T17_2_gfblup_variances_all_1_1000.rds") +saveRDS(T17_2_gfblup_prediction_all,"T17_2_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_2_gfblup_validate_all,"T17_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_2_2001_3000.R b/code/using_GO/pla/T17_2_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..1b5235f52b25b58ec354752827b95ed51a7bf523 --- /dev/null +++ b/code/using_GO/pla/T17_2_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_2_gblup_variances_all=rep(list(list()),cycles) +T17_2_gblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_variances_all=rep(list(list()),cycles) +T17_2_gfblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_2...") + y=pheno_df_pla$T17_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_2_gblup_variances_all[[r]]<-var + T17_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_2_gblup_variances_all[[r]]<-list() + T17_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_2_gfblup_variances_all[[r]]<-var + T17_2_gfblup_prediction_all[[r]]<-pred + T17_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_2_gblup_variances_all,"T17_2_gblup_variances_all_2001_3000.rds") +saveRDS(T17_2_gblup_prediction_all,"T17_2_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_2_gfblup_variances_all,"T17_2_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_2_gfblup_prediction_all,"T17_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_2_gfblup_validate_all,"T17_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_2_3001_4000.R b/code/using_GO/pla/T17_2_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..9bb76504b3bb31d357ff3633a8ac0b02539544ab --- /dev/null +++ b/code/using_GO/pla/T17_2_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_2_gblup_variances_all=rep(list(list()),cycles) +T17_2_gblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_variances_all=rep(list(list()),cycles) +T17_2_gfblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_2...") + y=pheno_df_pla$T17_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_2_gblup_variances_all[[r]]<-var + T17_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_2_gblup_variances_all[[r]]<-list() + T17_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_2_gfblup_variances_all[[r]]<-var + T17_2_gfblup_prediction_all[[r]]<-pred + T17_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_2_gblup_variances_all,"T17_2_gblup_variances_all_3001_4000.rds") +saveRDS(T17_2_gblup_prediction_all,"T17_2_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_2_gfblup_variances_all,"T17_2_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_2_gfblup_prediction_all,"T17_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_2_gfblup_validate_all,"T17_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_2_4001_5000.R b/code/using_GO/pla/T17_2_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..9bc8e48631232e35387e412ed0c91d8e696023bc --- /dev/null +++ b/code/using_GO/pla/T17_2_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_2_gblup_variances_all=rep(list(list()),cycles) +T17_2_gblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_variances_all=rep(list(list()),cycles) +T17_2_gfblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_2...") + y=pheno_df_pla$T17_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_2_gblup_variances_all[[r]]<-var + T17_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_2_gblup_variances_all[[r]]<-list() + T17_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_2_gfblup_variances_all[[r]]<-var + T17_2_gfblup_prediction_all[[r]]<-pred + T17_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_2_gblup_variances_all,"T17_2_gblup_variances_all_4001_5000.rds") +saveRDS(T17_2_gblup_prediction_all,"T17_2_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_2_gfblup_variances_all,"T17_2_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_2_gfblup_prediction_all,"T17_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_2_gfblup_validate_all,"T17_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_2_5001_6000.R b/code/using_GO/pla/T17_2_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..d83851d8417a2882c7eb1aa73042e7985447ede6 --- /dev/null +++ b/code/using_GO/pla/T17_2_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_2_gblup_variances_all=rep(list(list()),cycles) +T17_2_gblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_variances_all=rep(list(list()),cycles) +T17_2_gfblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_2...") + y=pheno_df_pla$T17_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_2_gblup_variances_all[[r]]<-var + T17_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_2_gblup_variances_all[[r]]<-list() + T17_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_2_gfblup_variances_all[[r]]<-var + T17_2_gfblup_prediction_all[[r]]<-pred + T17_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_2_gblup_variances_all,"T17_2_gblup_variances_all_5001_6000.rds") +saveRDS(T17_2_gblup_prediction_all,"T17_2_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_2_gfblup_variances_all,"T17_2_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_2_gfblup_prediction_all,"T17_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_2_gfblup_validate_all,"T17_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_2_6001_7297.R b/code/using_GO/pla/T17_2_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..d39f74a49427c2f8e338147e192afc47e498bb84 --- /dev/null +++ b/code/using_GO/pla/T17_2_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_2_gblup_variances_all=rep(list(list()),cycles) +T17_2_gblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_variances_all=rep(list(list()),cycles) +T17_2_gfblup_prediction_all=rep(list(list()),cycles) +T17_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_2...") + y=pheno_df_pla$T17_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_2_gblup_variances_all[[r]]<-var + T17_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_2_gblup_variances_all[[r]]<-list() + T17_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_2_gfblup_variances_all[[r]]<-var + T17_2_gfblup_prediction_all[[r]]<-pred + T17_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_2_gblup_variances_all,"T17_2_gblup_variances_all_6001_7297.rds") +saveRDS(T17_2_gblup_prediction_all,"T17_2_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_2_gfblup_variances_all,"T17_2_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_2_gfblup_prediction_all,"T17_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_2_gfblup_validate_all,"T17_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_3_1001_2000.R b/code/using_GO/pla/T17_3_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..92fb34675e67cc9a5b62373fc758c701fe3c8793 --- /dev/null +++ b/code/using_GO/pla/T17_3_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_3_gblup_variances_all=rep(list(list()),cycles) +T17_3_gblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_variances_all=rep(list(list()),cycles) +T17_3_gfblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_3...") + y=pheno_df_pla$T17_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_3_gblup_variances_all[[r]]<-var + T17_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_3_gblup_variances_all[[r]]<-list() + T17_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_3_gfblup_variances_all[[r]]<-var + T17_3_gfblup_prediction_all[[r]]<-pred + T17_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_3_gblup_variances_all,"T17_3_gblup_variances_all_1001_2000.rds") +saveRDS(T17_3_gblup_prediction_all,"T17_3_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_3_gfblup_variances_all,"T17_3_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_3_gfblup_prediction_all,"T17_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_3_gfblup_validate_all,"T17_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_3_1_1000.R b/code/using_GO/pla/T17_3_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..dd3f119b3e52d27367a50fe2c38372390c736da5 --- /dev/null +++ b/code/using_GO/pla/T17_3_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_3_gblup_variances_all=rep(list(list()),cycles) +T17_3_gblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_variances_all=rep(list(list()),cycles) +T17_3_gfblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_3...") + y=pheno_df_pla$T17_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_3_gblup_variances_all[[r]]<-var + T17_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_3_gblup_variances_all[[r]]<-list() + T17_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_3_gfblup_variances_all[[r]]<-var + T17_3_gfblup_prediction_all[[r]]<-pred + T17_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_3_gblup_variances_all,"T17_3_gblup_variances_all_1_1000.rds") +saveRDS(T17_3_gblup_prediction_all,"T17_3_gblup_prediction_all_1_1000.rds") +saveRDS(T17_3_gfblup_variances_all,"T17_3_gfblup_variances_all_1_1000.rds") +saveRDS(T17_3_gfblup_prediction_all,"T17_3_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_3_gfblup_validate_all,"T17_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_3_2001_3000.R b/code/using_GO/pla/T17_3_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..5896ef58a6c0dfb3333b37ad971a215f23f1d499 --- /dev/null +++ b/code/using_GO/pla/T17_3_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_3_gblup_variances_all=rep(list(list()),cycles) +T17_3_gblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_variances_all=rep(list(list()),cycles) +T17_3_gfblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_3...") + y=pheno_df_pla$T17_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_3_gblup_variances_all[[r]]<-var + T17_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_3_gblup_variances_all[[r]]<-list() + T17_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_3_gfblup_variances_all[[r]]<-var + T17_3_gfblup_prediction_all[[r]]<-pred + T17_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_3_gblup_variances_all,"T17_3_gblup_variances_all_2001_3000.rds") +saveRDS(T17_3_gblup_prediction_all,"T17_3_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_3_gfblup_variances_all,"T17_3_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_3_gfblup_prediction_all,"T17_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_3_gfblup_validate_all,"T17_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_3_3001_4000.R b/code/using_GO/pla/T17_3_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..d0e95d104bb676bfa3bedb228d76f739c5ca5acf --- /dev/null +++ b/code/using_GO/pla/T17_3_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_3_gblup_variances_all=rep(list(list()),cycles) +T17_3_gblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_variances_all=rep(list(list()),cycles) +T17_3_gfblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_3...") + y=pheno_df_pla$T17_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_3_gblup_variances_all[[r]]<-var + T17_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_3_gblup_variances_all[[r]]<-list() + T17_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_3_gfblup_variances_all[[r]]<-var + T17_3_gfblup_prediction_all[[r]]<-pred + T17_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_3_gblup_variances_all,"T17_3_gblup_variances_all_3001_4000.rds") +saveRDS(T17_3_gblup_prediction_all,"T17_3_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_3_gfblup_variances_all,"T17_3_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_3_gfblup_prediction_all,"T17_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_3_gfblup_validate_all,"T17_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_3_4001_5000.R b/code/using_GO/pla/T17_3_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..bf5c72de5975c76070712bf91bddddaea5466bb8 --- /dev/null +++ b/code/using_GO/pla/T17_3_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_3_gblup_variances_all=rep(list(list()),cycles) +T17_3_gblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_variances_all=rep(list(list()),cycles) +T17_3_gfblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_3...") + y=pheno_df_pla$T17_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_3_gblup_variances_all[[r]]<-var + T17_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_3_gblup_variances_all[[r]]<-list() + T17_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_3_gfblup_variances_all[[r]]<-var + T17_3_gfblup_prediction_all[[r]]<-pred + T17_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_3_gblup_variances_all,"T17_3_gblup_variances_all_4001_5000.rds") +saveRDS(T17_3_gblup_prediction_all,"T17_3_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_3_gfblup_variances_all,"T17_3_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_3_gfblup_prediction_all,"T17_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_3_gfblup_validate_all,"T17_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_3_5001_6000.R b/code/using_GO/pla/T17_3_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..5a8711545983e17d852f6f1732ac7f49a9aae826 --- /dev/null +++ b/code/using_GO/pla/T17_3_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_3_gblup_variances_all=rep(list(list()),cycles) +T17_3_gblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_variances_all=rep(list(list()),cycles) +T17_3_gfblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_3...") + y=pheno_df_pla$T17_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_3_gblup_variances_all[[r]]<-var + T17_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_3_gblup_variances_all[[r]]<-list() + T17_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_3_gfblup_variances_all[[r]]<-var + T17_3_gfblup_prediction_all[[r]]<-pred + T17_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_3_gblup_variances_all,"T17_3_gblup_variances_all_5001_6000.rds") +saveRDS(T17_3_gblup_prediction_all,"T17_3_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_3_gfblup_variances_all,"T17_3_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_3_gfblup_prediction_all,"T17_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_3_gfblup_validate_all,"T17_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_3_6001_7297.R b/code/using_GO/pla/T17_3_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..e8732e5cc484d81a9c2eb36673de9bacbf4157ff --- /dev/null +++ b/code/using_GO/pla/T17_3_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_3_gblup_variances_all=rep(list(list()),cycles) +T17_3_gblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_variances_all=rep(list(list()),cycles) +T17_3_gfblup_prediction_all=rep(list(list()),cycles) +T17_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_3...") + y=pheno_df_pla$T17_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_3_gblup_variances_all[[r]]<-var + T17_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_3_gblup_variances_all[[r]]<-list() + T17_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_3_gfblup_variances_all[[r]]<-var + T17_3_gfblup_prediction_all[[r]]<-pred + T17_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_3_gblup_variances_all,"T17_3_gblup_variances_all_6001_7297.rds") +saveRDS(T17_3_gblup_prediction_all,"T17_3_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_3_gfblup_variances_all,"T17_3_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_3_gfblup_prediction_all,"T17_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_3_gfblup_validate_all,"T17_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_4_1001_2000.R b/code/using_GO/pla/T17_4_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..19cd99b84d0c002f585e5e3eef0dbab14c8ce3ed --- /dev/null +++ b/code/using_GO/pla/T17_4_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_4_gblup_variances_all=rep(list(list()),cycles) +T17_4_gblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_variances_all=rep(list(list()),cycles) +T17_4_gfblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_4...") + y=pheno_df_pla$T17_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_4_gblup_variances_all[[r]]<-var + T17_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_4_gblup_variances_all[[r]]<-list() + T17_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_4_gfblup_variances_all[[r]]<-var + T17_4_gfblup_prediction_all[[r]]<-pred + T17_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_4_gblup_variances_all,"T17_4_gblup_variances_all_1001_2000.rds") +saveRDS(T17_4_gblup_prediction_all,"T17_4_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_4_gfblup_variances_all,"T17_4_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_4_gfblup_prediction_all,"T17_4_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_4_gfblup_validate_all,"T17_4_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_4_1_1000.R b/code/using_GO/pla/T17_4_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..cc879269c04903d47b6cd88cf9d3b48a11329d40 --- /dev/null +++ b/code/using_GO/pla/T17_4_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_4_gblup_variances_all=rep(list(list()),cycles) +T17_4_gblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_variances_all=rep(list(list()),cycles) +T17_4_gfblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_4...") + y=pheno_df_pla$T17_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_4_gblup_variances_all[[r]]<-var + T17_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_4_gblup_variances_all[[r]]<-list() + T17_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_4_gfblup_variances_all[[r]]<-var + T17_4_gfblup_prediction_all[[r]]<-pred + T17_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_4_gblup_variances_all,"T17_4_gblup_variances_all_1_1000.rds") +saveRDS(T17_4_gblup_prediction_all,"T17_4_gblup_prediction_all_1_1000.rds") +saveRDS(T17_4_gfblup_variances_all,"T17_4_gfblup_variances_all_1_1000.rds") +saveRDS(T17_4_gfblup_prediction_all,"T17_4_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_4_gfblup_validate_all,"T17_4_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_4_2001_3000.R b/code/using_GO/pla/T17_4_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..5eb4e629649f655a262e0ceeebdbc74d2e484676 --- /dev/null +++ b/code/using_GO/pla/T17_4_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_4_gblup_variances_all=rep(list(list()),cycles) +T17_4_gblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_variances_all=rep(list(list()),cycles) +T17_4_gfblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_4...") + y=pheno_df_pla$T17_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_4_gblup_variances_all[[r]]<-var + T17_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_4_gblup_variances_all[[r]]<-list() + T17_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_4_gfblup_variances_all[[r]]<-var + T17_4_gfblup_prediction_all[[r]]<-pred + T17_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_4_gblup_variances_all,"T17_4_gblup_variances_all_2001_3000.rds") +saveRDS(T17_4_gblup_prediction_all,"T17_4_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_4_gfblup_variances_all,"T17_4_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_4_gfblup_prediction_all,"T17_4_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_4_gfblup_validate_all,"T17_4_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_4_3001_4000.R b/code/using_GO/pla/T17_4_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..edf1dfb5a3e1441a682408595a8d1067eafe291c --- /dev/null +++ b/code/using_GO/pla/T17_4_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_4_gblup_variances_all=rep(list(list()),cycles) +T17_4_gblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_variances_all=rep(list(list()),cycles) +T17_4_gfblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_4...") + y=pheno_df_pla$T17_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_4_gblup_variances_all[[r]]<-var + T17_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_4_gblup_variances_all[[r]]<-list() + T17_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_4_gfblup_variances_all[[r]]<-var + T17_4_gfblup_prediction_all[[r]]<-pred + T17_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_4_gblup_variances_all,"T17_4_gblup_variances_all_3001_4000.rds") +saveRDS(T17_4_gblup_prediction_all,"T17_4_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_4_gfblup_variances_all,"T17_4_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_4_gfblup_prediction_all,"T17_4_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_4_gfblup_validate_all,"T17_4_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_4_4001_5000.R b/code/using_GO/pla/T17_4_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..9f1b98f6380306ac85416ee3f119da2a76e2194e --- /dev/null +++ b/code/using_GO/pla/T17_4_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_4_gblup_variances_all=rep(list(list()),cycles) +T17_4_gblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_variances_all=rep(list(list()),cycles) +T17_4_gfblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_4...") + y=pheno_df_pla$T17_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_4_gblup_variances_all[[r]]<-var + T17_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_4_gblup_variances_all[[r]]<-list() + T17_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_4_gfblup_variances_all[[r]]<-var + T17_4_gfblup_prediction_all[[r]]<-pred + T17_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_4_gblup_variances_all,"T17_4_gblup_variances_all_4001_5000.rds") +saveRDS(T17_4_gblup_prediction_all,"T17_4_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_4_gfblup_variances_all,"T17_4_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_4_gfblup_prediction_all,"T17_4_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_4_gfblup_validate_all,"T17_4_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_4_5001_6000.R b/code/using_GO/pla/T17_4_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..d3f9d0a48b39811bab411a7f1f2edf97018121fb --- /dev/null +++ b/code/using_GO/pla/T17_4_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_4_gblup_variances_all=rep(list(list()),cycles) +T17_4_gblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_variances_all=rep(list(list()),cycles) +T17_4_gfblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_4...") + y=pheno_df_pla$T17_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_4_gblup_variances_all[[r]]<-var + T17_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_4_gblup_variances_all[[r]]<-list() + T17_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_4_gfblup_variances_all[[r]]<-var + T17_4_gfblup_prediction_all[[r]]<-pred + T17_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_4_gblup_variances_all,"T17_4_gblup_variances_all_5001_6000.rds") +saveRDS(T17_4_gblup_prediction_all,"T17_4_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_4_gfblup_variances_all,"T17_4_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_4_gfblup_prediction_all,"T17_4_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_4_gfblup_validate_all,"T17_4_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_4_6001_7297.R b/code/using_GO/pla/T17_4_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..b773a9cc67a81a76f21316b775db75dc3f470fcd --- /dev/null +++ b/code/using_GO/pla/T17_4_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_4_gblup_variances_all=rep(list(list()),cycles) +T17_4_gblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_variances_all=rep(list(list()),cycles) +T17_4_gfblup_prediction_all=rep(list(list()),cycles) +T17_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_4...") + y=pheno_df_pla$T17_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_4_gblup_variances_all[[r]]<-var + T17_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_4_gblup_variances_all[[r]]<-list() + T17_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_4_gfblup_variances_all[[r]]<-var + T17_4_gfblup_prediction_all[[r]]<-pred + T17_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_4_gblup_variances_all,"T17_4_gblup_variances_all_6001_7297.rds") +saveRDS(T17_4_gblup_prediction_all,"T17_4_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_4_gfblup_variances_all,"T17_4_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_4_gfblup_prediction_all,"T17_4_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_4_gfblup_validate_all,"T17_4_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_5_1001_2000.R b/code/using_GO/pla/T17_5_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..6adad7610e8bf57a60a2576627ce06b530dcd96b --- /dev/null +++ b/code/using_GO/pla/T17_5_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_5_gblup_variances_all=rep(list(list()),cycles) +T17_5_gblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_variances_all=rep(list(list()),cycles) +T17_5_gfblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_5...") + y=pheno_df_pla$T17_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_5_gblup_variances_all[[r]]<-var + T17_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_5_gblup_variances_all[[r]]<-list() + T17_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_5_gfblup_variances_all[[r]]<-var + T17_5_gfblup_prediction_all[[r]]<-pred + T17_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_5_gblup_variances_all,"T17_5_gblup_variances_all_1001_2000.rds") +saveRDS(T17_5_gblup_prediction_all,"T17_5_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_5_gfblup_variances_all,"T17_5_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_5_gfblup_prediction_all,"T17_5_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_5_gfblup_validate_all,"T17_5_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_5_1_1000.R b/code/using_GO/pla/T17_5_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..d423904cac8a53dcfaa8738834d8f3f5425c309f --- /dev/null +++ b/code/using_GO/pla/T17_5_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_5_gblup_variances_all=rep(list(list()),cycles) +T17_5_gblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_variances_all=rep(list(list()),cycles) +T17_5_gfblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_5...") + y=pheno_df_pla$T17_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_5_gblup_variances_all[[r]]<-var + T17_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_5_gblup_variances_all[[r]]<-list() + T17_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_5_gfblup_variances_all[[r]]<-var + T17_5_gfblup_prediction_all[[r]]<-pred + T17_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_5_gblup_variances_all,"T17_5_gblup_variances_all_1_1000.rds") +saveRDS(T17_5_gblup_prediction_all,"T17_5_gblup_prediction_all_1_1000.rds") +saveRDS(T17_5_gfblup_variances_all,"T17_5_gfblup_variances_all_1_1000.rds") +saveRDS(T17_5_gfblup_prediction_all,"T17_5_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_5_gfblup_validate_all,"T17_5_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_5_2001_3000.R b/code/using_GO/pla/T17_5_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..365c4cad47c0eb509e1b1246833ccbff669cff55 --- /dev/null +++ b/code/using_GO/pla/T17_5_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_5_gblup_variances_all=rep(list(list()),cycles) +T17_5_gblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_variances_all=rep(list(list()),cycles) +T17_5_gfblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_5...") + y=pheno_df_pla$T17_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_5_gblup_variances_all[[r]]<-var + T17_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_5_gblup_variances_all[[r]]<-list() + T17_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_5_gfblup_variances_all[[r]]<-var + T17_5_gfblup_prediction_all[[r]]<-pred + T17_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_5_gblup_variances_all,"T17_5_gblup_variances_all_2001_3000.rds") +saveRDS(T17_5_gblup_prediction_all,"T17_5_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_5_gfblup_variances_all,"T17_5_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_5_gfblup_prediction_all,"T17_5_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_5_gfblup_validate_all,"T17_5_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_5_3001_4000.R b/code/using_GO/pla/T17_5_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..5a70ee43fd51f9c007d0710cc1bd3741c4e1c5fc --- /dev/null +++ b/code/using_GO/pla/T17_5_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_5_gblup_variances_all=rep(list(list()),cycles) +T17_5_gblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_variances_all=rep(list(list()),cycles) +T17_5_gfblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_5...") + y=pheno_df_pla$T17_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_5_gblup_variances_all[[r]]<-var + T17_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_5_gblup_variances_all[[r]]<-list() + T17_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_5_gfblup_variances_all[[r]]<-var + T17_5_gfblup_prediction_all[[r]]<-pred + T17_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_5_gblup_variances_all,"T17_5_gblup_variances_all_3001_4000.rds") +saveRDS(T17_5_gblup_prediction_all,"T17_5_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_5_gfblup_variances_all,"T17_5_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_5_gfblup_prediction_all,"T17_5_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_5_gfblup_validate_all,"T17_5_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_5_4001_5000.R b/code/using_GO/pla/T17_5_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..fbd74007c60faeac17a82ceb910bbe8906df8185 --- /dev/null +++ b/code/using_GO/pla/T17_5_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_5_gblup_variances_all=rep(list(list()),cycles) +T17_5_gblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_variances_all=rep(list(list()),cycles) +T17_5_gfblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_5...") + y=pheno_df_pla$T17_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_5_gblup_variances_all[[r]]<-var + T17_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_5_gblup_variances_all[[r]]<-list() + T17_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_5_gfblup_variances_all[[r]]<-var + T17_5_gfblup_prediction_all[[r]]<-pred + T17_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_5_gblup_variances_all,"T17_5_gblup_variances_all_4001_5000.rds") +saveRDS(T17_5_gblup_prediction_all,"T17_5_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_5_gfblup_variances_all,"T17_5_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_5_gfblup_prediction_all,"T17_5_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_5_gfblup_validate_all,"T17_5_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_5_5001_6000.R b/code/using_GO/pla/T17_5_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..f6d90f2a225d50003366199e8b88ca2d3645c620 --- /dev/null +++ b/code/using_GO/pla/T17_5_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_5_gblup_variances_all=rep(list(list()),cycles) +T17_5_gblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_variances_all=rep(list(list()),cycles) +T17_5_gfblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_5...") + y=pheno_df_pla$T17_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_5_gblup_variances_all[[r]]<-var + T17_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_5_gblup_variances_all[[r]]<-list() + T17_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_5_gfblup_variances_all[[r]]<-var + T17_5_gfblup_prediction_all[[r]]<-pred + T17_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_5_gblup_variances_all,"T17_5_gblup_variances_all_5001_6000.rds") +saveRDS(T17_5_gblup_prediction_all,"T17_5_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_5_gfblup_variances_all,"T17_5_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_5_gfblup_prediction_all,"T17_5_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_5_gfblup_validate_all,"T17_5_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_5_6001_7297.R b/code/using_GO/pla/T17_5_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..94e5775594620fe45eca955c94a2a30464a4b9ca --- /dev/null +++ b/code/using_GO/pla/T17_5_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_5_gblup_variances_all=rep(list(list()),cycles) +T17_5_gblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_variances_all=rep(list(list()),cycles) +T17_5_gfblup_prediction_all=rep(list(list()),cycles) +T17_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_5...") + y=pheno_df_pla$T17_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_5_gblup_variances_all[[r]]<-var + T17_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_5_gblup_variances_all[[r]]<-list() + T17_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_5_gfblup_variances_all[[r]]<-var + T17_5_gfblup_prediction_all[[r]]<-pred + T17_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_5_gblup_variances_all,"T17_5_gblup_variances_all_6001_7297.rds") +saveRDS(T17_5_gblup_prediction_all,"T17_5_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_5_gfblup_variances_all,"T17_5_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_5_gfblup_prediction_all,"T17_5_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_5_gfblup_validate_all,"T17_5_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_6_1001_2000.R b/code/using_GO/pla/T17_6_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..6c041cdb79a3c1a9d364924f176d89bb5797e933 --- /dev/null +++ b/code/using_GO/pla/T17_6_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_6_gblup_variances_all=rep(list(list()),cycles) +T17_6_gblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_variances_all=rep(list(list()),cycles) +T17_6_gfblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_6...") + y=pheno_df_pla$T17_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_6_gblup_variances_all[[r]]<-var + T17_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_6_gblup_variances_all[[r]]<-list() + T17_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_6_gfblup_variances_all[[r]]<-var + T17_6_gfblup_prediction_all[[r]]<-pred + T17_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_6_gblup_variances_all,"T17_6_gblup_variances_all_1001_2000.rds") +saveRDS(T17_6_gblup_prediction_all,"T17_6_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_6_gfblup_variances_all,"T17_6_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_6_gfblup_prediction_all,"T17_6_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_6_gfblup_validate_all,"T17_6_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_6_1_1000.R b/code/using_GO/pla/T17_6_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..46860aea7ef3249e35b9676b64445c5c5d61395d --- /dev/null +++ b/code/using_GO/pla/T17_6_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_6_gblup_variances_all=rep(list(list()),cycles) +T17_6_gblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_variances_all=rep(list(list()),cycles) +T17_6_gfblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_6...") + y=pheno_df_pla$T17_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_6_gblup_variances_all[[r]]<-var + T17_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_6_gblup_variances_all[[r]]<-list() + T17_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_6_gfblup_variances_all[[r]]<-var + T17_6_gfblup_prediction_all[[r]]<-pred + T17_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_6_gblup_variances_all,"T17_6_gblup_variances_all_1_1000.rds") +saveRDS(T17_6_gblup_prediction_all,"T17_6_gblup_prediction_all_1_1000.rds") +saveRDS(T17_6_gfblup_variances_all,"T17_6_gfblup_variances_all_1_1000.rds") +saveRDS(T17_6_gfblup_prediction_all,"T17_6_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_6_gfblup_validate_all,"T17_6_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_6_2001_3000.R b/code/using_GO/pla/T17_6_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..41e9f52b307f4d8e9bc9012f83aa10092cb1517a --- /dev/null +++ b/code/using_GO/pla/T17_6_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_6_gblup_variances_all=rep(list(list()),cycles) +T17_6_gblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_variances_all=rep(list(list()),cycles) +T17_6_gfblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_6...") + y=pheno_df_pla$T17_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_6_gblup_variances_all[[r]]<-var + T17_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_6_gblup_variances_all[[r]]<-list() + T17_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_6_gfblup_variances_all[[r]]<-var + T17_6_gfblup_prediction_all[[r]]<-pred + T17_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_6_gblup_variances_all,"T17_6_gblup_variances_all_2001_3000.rds") +saveRDS(T17_6_gblup_prediction_all,"T17_6_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_6_gfblup_variances_all,"T17_6_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_6_gfblup_prediction_all,"T17_6_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_6_gfblup_validate_all,"T17_6_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_6_3001_4000.R b/code/using_GO/pla/T17_6_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..165d60b26cd46fc31d625c9837966aa47f5064c6 --- /dev/null +++ b/code/using_GO/pla/T17_6_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_6_gblup_variances_all=rep(list(list()),cycles) +T17_6_gblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_variances_all=rep(list(list()),cycles) +T17_6_gfblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_6...") + y=pheno_df_pla$T17_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_6_gblup_variances_all[[r]]<-var + T17_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_6_gblup_variances_all[[r]]<-list() + T17_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_6_gfblup_variances_all[[r]]<-var + T17_6_gfblup_prediction_all[[r]]<-pred + T17_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_6_gblup_variances_all,"T17_6_gblup_variances_all_3001_4000.rds") +saveRDS(T17_6_gblup_prediction_all,"T17_6_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_6_gfblup_variances_all,"T17_6_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_6_gfblup_prediction_all,"T17_6_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_6_gfblup_validate_all,"T17_6_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_6_4001_5000.R b/code/using_GO/pla/T17_6_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..abe1608c6fe4df3a6174227505d22fd9e4a73908 --- /dev/null +++ b/code/using_GO/pla/T17_6_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_6_gblup_variances_all=rep(list(list()),cycles) +T17_6_gblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_variances_all=rep(list(list()),cycles) +T17_6_gfblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_6...") + y=pheno_df_pla$T17_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_6_gblup_variances_all[[r]]<-var + T17_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_6_gblup_variances_all[[r]]<-list() + T17_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_6_gfblup_variances_all[[r]]<-var + T17_6_gfblup_prediction_all[[r]]<-pred + T17_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_6_gblup_variances_all,"T17_6_gblup_variances_all_4001_5000.rds") +saveRDS(T17_6_gblup_prediction_all,"T17_6_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_6_gfblup_variances_all,"T17_6_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_6_gfblup_prediction_all,"T17_6_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_6_gfblup_validate_all,"T17_6_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_6_5001_6000.R b/code/using_GO/pla/T17_6_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..96c153099880fc37bb33b93ec736255f9f88ec8e --- /dev/null +++ b/code/using_GO/pla/T17_6_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_6_gblup_variances_all=rep(list(list()),cycles) +T17_6_gblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_variances_all=rep(list(list()),cycles) +T17_6_gfblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_6...") + y=pheno_df_pla$T17_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_6_gblup_variances_all[[r]]<-var + T17_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_6_gblup_variances_all[[r]]<-list() + T17_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_6_gfblup_variances_all[[r]]<-var + T17_6_gfblup_prediction_all[[r]]<-pred + T17_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_6_gblup_variances_all,"T17_6_gblup_variances_all_5001_6000.rds") +saveRDS(T17_6_gblup_prediction_all,"T17_6_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_6_gfblup_variances_all,"T17_6_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_6_gfblup_prediction_all,"T17_6_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_6_gfblup_validate_all,"T17_6_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_6_6001_7297.R b/code/using_GO/pla/T17_6_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..f21d37ba37977222da28f4dca4b171ff98b7befa --- /dev/null +++ b/code/using_GO/pla/T17_6_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_6_gblup_variances_all=rep(list(list()),cycles) +T17_6_gblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_variances_all=rep(list(list()),cycles) +T17_6_gfblup_prediction_all=rep(list(list()),cycles) +T17_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_6...") + y=pheno_df_pla$T17_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_6_gblup_variances_all[[r]]<-var + T17_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_6_gblup_variances_all[[r]]<-list() + T17_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_6_gfblup_variances_all[[r]]<-var + T17_6_gfblup_prediction_all[[r]]<-pred + T17_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_6_gblup_variances_all,"T17_6_gblup_variances_all_6001_7297.rds") +saveRDS(T17_6_gblup_prediction_all,"T17_6_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_6_gfblup_variances_all,"T17_6_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_6_gfblup_prediction_all,"T17_6_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_6_gfblup_validate_all,"T17_6_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_7_1001_2000.R b/code/using_GO/pla/T17_7_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..04828866e107622a15058ae58189ffcd9b9c9d2a --- /dev/null +++ b/code/using_GO/pla/T17_7_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_7_gblup_variances_all=rep(list(list()),cycles) +T17_7_gblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_variances_all=rep(list(list()),cycles) +T17_7_gfblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_7...") + y=pheno_df_pla$T17_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_7_gblup_variances_all[[r]]<-var + T17_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_7_gblup_variances_all[[r]]<-list() + T17_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_7_gfblup_variances_all[[r]]<-var + T17_7_gfblup_prediction_all[[r]]<-pred + T17_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_7_gblup_variances_all,"T17_7_gblup_variances_all_1001_2000.rds") +saveRDS(T17_7_gblup_prediction_all,"T17_7_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_7_gfblup_variances_all,"T17_7_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_7_gfblup_prediction_all,"T17_7_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_7_gfblup_validate_all,"T17_7_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_7_1_1000.R b/code/using_GO/pla/T17_7_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..bfcf41e7d4850edee24e99251b98005f49f73896 --- /dev/null +++ b/code/using_GO/pla/T17_7_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_7_gblup_variances_all=rep(list(list()),cycles) +T17_7_gblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_variances_all=rep(list(list()),cycles) +T17_7_gfblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_7...") + y=pheno_df_pla$T17_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_7_gblup_variances_all[[r]]<-var + T17_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_7_gblup_variances_all[[r]]<-list() + T17_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_7_gfblup_variances_all[[r]]<-var + T17_7_gfblup_prediction_all[[r]]<-pred + T17_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_7_gblup_variances_all,"T17_7_gblup_variances_all_1_1000.rds") +saveRDS(T17_7_gblup_prediction_all,"T17_7_gblup_prediction_all_1_1000.rds") +saveRDS(T17_7_gfblup_variances_all,"T17_7_gfblup_variances_all_1_1000.rds") +saveRDS(T17_7_gfblup_prediction_all,"T17_7_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_7_gfblup_validate_all,"T17_7_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_7_2001_3000.R b/code/using_GO/pla/T17_7_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..732e7ffe947d64dbc47ad42cf177bff165e3667b --- /dev/null +++ b/code/using_GO/pla/T17_7_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_7_gblup_variances_all=rep(list(list()),cycles) +T17_7_gblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_variances_all=rep(list(list()),cycles) +T17_7_gfblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_7...") + y=pheno_df_pla$T17_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_7_gblup_variances_all[[r]]<-var + T17_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_7_gblup_variances_all[[r]]<-list() + T17_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_7_gfblup_variances_all[[r]]<-var + T17_7_gfblup_prediction_all[[r]]<-pred + T17_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_7_gblup_variances_all,"T17_7_gblup_variances_all_2001_3000.rds") +saveRDS(T17_7_gblup_prediction_all,"T17_7_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_7_gfblup_variances_all,"T17_7_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_7_gfblup_prediction_all,"T17_7_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_7_gfblup_validate_all,"T17_7_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_7_3001_4000.R b/code/using_GO/pla/T17_7_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..990353283d71679dacc28e8d4c0b91df086fafb8 --- /dev/null +++ b/code/using_GO/pla/T17_7_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_7_gblup_variances_all=rep(list(list()),cycles) +T17_7_gblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_variances_all=rep(list(list()),cycles) +T17_7_gfblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_7...") + y=pheno_df_pla$T17_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_7_gblup_variances_all[[r]]<-var + T17_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_7_gblup_variances_all[[r]]<-list() + T17_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_7_gfblup_variances_all[[r]]<-var + T17_7_gfblup_prediction_all[[r]]<-pred + T17_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_7_gblup_variances_all,"T17_7_gblup_variances_all_3001_4000.rds") +saveRDS(T17_7_gblup_prediction_all,"T17_7_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_7_gfblup_variances_all,"T17_7_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_7_gfblup_prediction_all,"T17_7_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_7_gfblup_validate_all,"T17_7_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_7_4001_5000.R b/code/using_GO/pla/T17_7_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..cf0bd608c555461f8f4894e593fc38e84ce8c15a --- /dev/null +++ b/code/using_GO/pla/T17_7_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_7_gblup_variances_all=rep(list(list()),cycles) +T17_7_gblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_variances_all=rep(list(list()),cycles) +T17_7_gfblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_7...") + y=pheno_df_pla$T17_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_7_gblup_variances_all[[r]]<-var + T17_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_7_gblup_variances_all[[r]]<-list() + T17_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_7_gfblup_variances_all[[r]]<-var + T17_7_gfblup_prediction_all[[r]]<-pred + T17_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_7_gblup_variances_all,"T17_7_gblup_variances_all_4001_5000.rds") +saveRDS(T17_7_gblup_prediction_all,"T17_7_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_7_gfblup_variances_all,"T17_7_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_7_gfblup_prediction_all,"T17_7_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_7_gfblup_validate_all,"T17_7_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_7_5001_6000.R b/code/using_GO/pla/T17_7_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..7ea304914239a39649a6a24bfe2c860f33a046c3 --- /dev/null +++ b/code/using_GO/pla/T17_7_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_7_gblup_variances_all=rep(list(list()),cycles) +T17_7_gblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_variances_all=rep(list(list()),cycles) +T17_7_gfblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_7...") + y=pheno_df_pla$T17_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_7_gblup_variances_all[[r]]<-var + T17_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_7_gblup_variances_all[[r]]<-list() + T17_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_7_gfblup_variances_all[[r]]<-var + T17_7_gfblup_prediction_all[[r]]<-pred + T17_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_7_gblup_variances_all,"T17_7_gblup_variances_all_5001_6000.rds") +saveRDS(T17_7_gblup_prediction_all,"T17_7_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_7_gfblup_variances_all,"T17_7_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_7_gfblup_prediction_all,"T17_7_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_7_gfblup_validate_all,"T17_7_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_7_6001_7297.R b/code/using_GO/pla/T17_7_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..aeda6a2868247b58877f3c6721fd7fefd771a732 --- /dev/null +++ b/code/using_GO/pla/T17_7_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_7_gblup_variances_all=rep(list(list()),cycles) +T17_7_gblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_variances_all=rep(list(list()),cycles) +T17_7_gfblup_prediction_all=rep(list(list()),cycles) +T17_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_7...") + y=pheno_df_pla$T17_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_7_gblup_variances_all[[r]]<-var + T17_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_7_gblup_variances_all[[r]]<-list() + T17_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_7_gfblup_variances_all[[r]]<-var + T17_7_gfblup_prediction_all[[r]]<-pred + T17_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_7_gblup_variances_all,"T17_7_gblup_variances_all_6001_7297.rds") +saveRDS(T17_7_gblup_prediction_all,"T17_7_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_7_gfblup_variances_all,"T17_7_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_7_gfblup_prediction_all,"T17_7_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_7_gfblup_validate_all,"T17_7_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_8_1001_2000.R b/code/using_GO/pla/T17_8_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..4484afb9d2c713b861db08e6c9afd89da8cfb03a --- /dev/null +++ b/code/using_GO/pla/T17_8_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_8_gblup_variances_all=rep(list(list()),cycles) +T17_8_gblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_variances_all=rep(list(list()),cycles) +T17_8_gfblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_8...") + y=pheno_df_pla$T17_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_8_gblup_variances_all[[r]]<-var + T17_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_8_gblup_variances_all[[r]]<-list() + T17_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_8_gfblup_variances_all[[r]]<-var + T17_8_gfblup_prediction_all[[r]]<-pred + T17_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_8_gblup_variances_all,"T17_8_gblup_variances_all_1001_2000.rds") +saveRDS(T17_8_gblup_prediction_all,"T17_8_gblup_prediction_all_1001_2000.rds") +saveRDS(T17_8_gfblup_variances_all,"T17_8_gfblup_variances_all_1001_2000.rds") +saveRDS(T17_8_gfblup_prediction_all,"T17_8_gfblup_prediction_all_1001_2000.rds") +saveRDS(T17_8_gfblup_validate_all,"T17_8_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_8_1_1000.R b/code/using_GO/pla/T17_8_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..17ff57712fdfa37eb60034b70d01e4a4ba4a74f4 --- /dev/null +++ b/code/using_GO/pla/T17_8_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_8_gblup_variances_all=rep(list(list()),cycles) +T17_8_gblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_variances_all=rep(list(list()),cycles) +T17_8_gfblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_8...") + y=pheno_df_pla$T17_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_8_gblup_variances_all[[r]]<-var + T17_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_8_gblup_variances_all[[r]]<-list() + T17_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_8_gfblup_variances_all[[r]]<-var + T17_8_gfblup_prediction_all[[r]]<-pred + T17_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_8_gblup_variances_all,"T17_8_gblup_variances_all_1_1000.rds") +saveRDS(T17_8_gblup_prediction_all,"T17_8_gblup_prediction_all_1_1000.rds") +saveRDS(T17_8_gfblup_variances_all,"T17_8_gfblup_variances_all_1_1000.rds") +saveRDS(T17_8_gfblup_prediction_all,"T17_8_gfblup_prediction_all_1_1000.rds") +saveRDS(T17_8_gfblup_validate_all,"T17_8_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_8_2001_3000.R b/code/using_GO/pla/T17_8_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..d35a28baab8302c4617a261349a6d40e6b258a78 --- /dev/null +++ b/code/using_GO/pla/T17_8_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_8_gblup_variances_all=rep(list(list()),cycles) +T17_8_gblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_variances_all=rep(list(list()),cycles) +T17_8_gfblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_8...") + y=pheno_df_pla$T17_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_8_gblup_variances_all[[r]]<-var + T17_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_8_gblup_variances_all[[r]]<-list() + T17_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_8_gfblup_variances_all[[r]]<-var + T17_8_gfblup_prediction_all[[r]]<-pred + T17_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_8_gblup_variances_all,"T17_8_gblup_variances_all_2001_3000.rds") +saveRDS(T17_8_gblup_prediction_all,"T17_8_gblup_prediction_all_2001_3000.rds") +saveRDS(T17_8_gfblup_variances_all,"T17_8_gfblup_variances_all_2001_3000.rds") +saveRDS(T17_8_gfblup_prediction_all,"T17_8_gfblup_prediction_all_2001_3000.rds") +saveRDS(T17_8_gfblup_validate_all,"T17_8_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_8_3001_4000.R b/code/using_GO/pla/T17_8_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..97ac630d1164fc90c5db0bbe321af69606c87edf --- /dev/null +++ b/code/using_GO/pla/T17_8_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_8_gblup_variances_all=rep(list(list()),cycles) +T17_8_gblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_variances_all=rep(list(list()),cycles) +T17_8_gfblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_8...") + y=pheno_df_pla$T17_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_8_gblup_variances_all[[r]]<-var + T17_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_8_gblup_variances_all[[r]]<-list() + T17_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_8_gfblup_variances_all[[r]]<-var + T17_8_gfblup_prediction_all[[r]]<-pred + T17_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_8_gblup_variances_all,"T17_8_gblup_variances_all_3001_4000.rds") +saveRDS(T17_8_gblup_prediction_all,"T17_8_gblup_prediction_all_3001_4000.rds") +saveRDS(T17_8_gfblup_variances_all,"T17_8_gfblup_variances_all_3001_4000.rds") +saveRDS(T17_8_gfblup_prediction_all,"T17_8_gfblup_prediction_all_3001_4000.rds") +saveRDS(T17_8_gfblup_validate_all,"T17_8_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_8_4001_5000.R b/code/using_GO/pla/T17_8_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..343dbb0d7faf42f8de3ccc856a54b16e30516c7d --- /dev/null +++ b/code/using_GO/pla/T17_8_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_8_gblup_variances_all=rep(list(list()),cycles) +T17_8_gblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_variances_all=rep(list(list()),cycles) +T17_8_gfblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_8...") + y=pheno_df_pla$T17_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_8_gblup_variances_all[[r]]<-var + T17_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_8_gblup_variances_all[[r]]<-list() + T17_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_8_gfblup_variances_all[[r]]<-var + T17_8_gfblup_prediction_all[[r]]<-pred + T17_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_8_gblup_variances_all,"T17_8_gblup_variances_all_4001_5000.rds") +saveRDS(T17_8_gblup_prediction_all,"T17_8_gblup_prediction_all_4001_5000.rds") +saveRDS(T17_8_gfblup_variances_all,"T17_8_gfblup_variances_all_4001_5000.rds") +saveRDS(T17_8_gfblup_prediction_all,"T17_8_gfblup_prediction_all_4001_5000.rds") +saveRDS(T17_8_gfblup_validate_all,"T17_8_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_8_5001_6000.R b/code/using_GO/pla/T17_8_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..0cef780146ba019f9513bb5f34d3bc443ab38763 --- /dev/null +++ b/code/using_GO/pla/T17_8_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_8_gblup_variances_all=rep(list(list()),cycles) +T17_8_gblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_variances_all=rep(list(list()),cycles) +T17_8_gfblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_8...") + y=pheno_df_pla$T17_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_8_gblup_variances_all[[r]]<-var + T17_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_8_gblup_variances_all[[r]]<-list() + T17_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_8_gfblup_variances_all[[r]]<-var + T17_8_gfblup_prediction_all[[r]]<-pred + T17_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_8_gblup_variances_all,"T17_8_gblup_variances_all_5001_6000.rds") +saveRDS(T17_8_gblup_prediction_all,"T17_8_gblup_prediction_all_5001_6000.rds") +saveRDS(T17_8_gfblup_variances_all,"T17_8_gfblup_variances_all_5001_6000.rds") +saveRDS(T17_8_gfblup_prediction_all,"T17_8_gfblup_prediction_all_5001_6000.rds") +saveRDS(T17_8_gfblup_validate_all,"T17_8_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T17_8_6001_7297.R b/code/using_GO/pla/T17_8_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..a4331119f9610956ae6845ab1c7eb8741662db89 --- /dev/null +++ b/code/using_GO/pla/T17_8_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T17_8_gblup_variances_all=rep(list(list()),cycles) +T17_8_gblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_variances_all=rep(list(list()),cycles) +T17_8_gfblup_prediction_all=rep(list(list()),cycles) +T17_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T17_8...") + y=pheno_df_pla$T17_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T17_8_gblup_variances_all[[r]]<-var + T17_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T17_8_gblup_variances_all[[r]]<-list() + T17_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T17_8_gfblup_variances_all[[r]]<-var + T17_8_gfblup_prediction_all[[r]]<-pred + T17_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T17_8_gblup_variances_all,"T17_8_gblup_variances_all_6001_7297.rds") +saveRDS(T17_8_gblup_prediction_all,"T17_8_gblup_prediction_all_6001_7297.rds") +saveRDS(T17_8_gfblup_variances_all,"T17_8_gfblup_variances_all_6001_7297.rds") +saveRDS(T17_8_gfblup_prediction_all,"T17_8_gfblup_prediction_all_6001_7297.rds") +saveRDS(T17_8_gfblup_validate_all,"T17_8_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_1_1001_2000.R b/code/using_GO/pla/T18_1_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..e395a94945591c4132e3115e80d431661c6bb029 --- /dev/null +++ b/code/using_GO/pla/T18_1_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_1_gblup_variances_all=rep(list(list()),cycles) +T18_1_gblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_variances_all=rep(list(list()),cycles) +T18_1_gfblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_1...") + y=pheno_df_pla$T18_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_1_gblup_variances_all[[r]]<-var + T18_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_1_gblup_variances_all[[r]]<-list() + T18_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_1_gfblup_variances_all[[r]]<-var + T18_1_gfblup_prediction_all[[r]]<-pred + T18_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_1_gblup_variances_all,"T18_1_gblup_variances_all_1001_2000.rds") +saveRDS(T18_1_gblup_prediction_all,"T18_1_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_1_gfblup_variances_all,"T18_1_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_1_gfblup_prediction_all,"T18_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_1_gfblup_validate_all,"T18_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_1_1_1000.R b/code/using_GO/pla/T18_1_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..aa48c3ceb48f7a348b13f9f972a901c7c4ad302b --- /dev/null +++ b/code/using_GO/pla/T18_1_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_1_gblup_variances_all=rep(list(list()),cycles) +T18_1_gblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_variances_all=rep(list(list()),cycles) +T18_1_gfblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_1...") + y=pheno_df_pla$T18_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_1_gblup_variances_all[[r]]<-var + T18_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_1_gblup_variances_all[[r]]<-list() + T18_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_1_gfblup_variances_all[[r]]<-var + T18_1_gfblup_prediction_all[[r]]<-pred + T18_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_1_gblup_variances_all,"T18_1_gblup_variances_all_1_1000.rds") +saveRDS(T18_1_gblup_prediction_all,"T18_1_gblup_prediction_all_1_1000.rds") +saveRDS(T18_1_gfblup_variances_all,"T18_1_gfblup_variances_all_1_1000.rds") +saveRDS(T18_1_gfblup_prediction_all,"T18_1_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_1_gfblup_validate_all,"T18_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_1_2001_3000.R b/code/using_GO/pla/T18_1_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..8cc1b225c70b18b4ea4d1a60c36ade177b0cabf5 --- /dev/null +++ b/code/using_GO/pla/T18_1_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_1_gblup_variances_all=rep(list(list()),cycles) +T18_1_gblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_variances_all=rep(list(list()),cycles) +T18_1_gfblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_1...") + y=pheno_df_pla$T18_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_1_gblup_variances_all[[r]]<-var + T18_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_1_gblup_variances_all[[r]]<-list() + T18_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_1_gfblup_variances_all[[r]]<-var + T18_1_gfblup_prediction_all[[r]]<-pred + T18_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_1_gblup_variances_all,"T18_1_gblup_variances_all_2001_3000.rds") +saveRDS(T18_1_gblup_prediction_all,"T18_1_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_1_gfblup_variances_all,"T18_1_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_1_gfblup_prediction_all,"T18_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_1_gfblup_validate_all,"T18_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_1_3001_4000.R b/code/using_GO/pla/T18_1_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..bbd0e59db229f025f5288d00699117ecc9d72785 --- /dev/null +++ b/code/using_GO/pla/T18_1_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_1_gblup_variances_all=rep(list(list()),cycles) +T18_1_gblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_variances_all=rep(list(list()),cycles) +T18_1_gfblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_1...") + y=pheno_df_pla$T18_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_1_gblup_variances_all[[r]]<-var + T18_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_1_gblup_variances_all[[r]]<-list() + T18_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_1_gfblup_variances_all[[r]]<-var + T18_1_gfblup_prediction_all[[r]]<-pred + T18_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_1_gblup_variances_all,"T18_1_gblup_variances_all_3001_4000.rds") +saveRDS(T18_1_gblup_prediction_all,"T18_1_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_1_gfblup_variances_all,"T18_1_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_1_gfblup_prediction_all,"T18_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_1_gfblup_validate_all,"T18_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_1_4001_5000.R b/code/using_GO/pla/T18_1_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..0020d413a1a8cf7e983657610686edfdb20fb87c --- /dev/null +++ b/code/using_GO/pla/T18_1_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_1_gblup_variances_all=rep(list(list()),cycles) +T18_1_gblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_variances_all=rep(list(list()),cycles) +T18_1_gfblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_1...") + y=pheno_df_pla$T18_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_1_gblup_variances_all[[r]]<-var + T18_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_1_gblup_variances_all[[r]]<-list() + T18_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_1_gfblup_variances_all[[r]]<-var + T18_1_gfblup_prediction_all[[r]]<-pred + T18_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_1_gblup_variances_all,"T18_1_gblup_variances_all_4001_5000.rds") +saveRDS(T18_1_gblup_prediction_all,"T18_1_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_1_gfblup_variances_all,"T18_1_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_1_gfblup_prediction_all,"T18_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_1_gfblup_validate_all,"T18_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_1_5001_6000.R b/code/using_GO/pla/T18_1_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..21fb218cd14e6a2b0fef80e340f7c9d23f53ad67 --- /dev/null +++ b/code/using_GO/pla/T18_1_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_1_gblup_variances_all=rep(list(list()),cycles) +T18_1_gblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_variances_all=rep(list(list()),cycles) +T18_1_gfblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_1...") + y=pheno_df_pla$T18_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_1_gblup_variances_all[[r]]<-var + T18_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_1_gblup_variances_all[[r]]<-list() + T18_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_1_gfblup_variances_all[[r]]<-var + T18_1_gfblup_prediction_all[[r]]<-pred + T18_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_1_gblup_variances_all,"T18_1_gblup_variances_all_5001_6000.rds") +saveRDS(T18_1_gblup_prediction_all,"T18_1_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_1_gfblup_variances_all,"T18_1_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_1_gfblup_prediction_all,"T18_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_1_gfblup_validate_all,"T18_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_1_6001_7297.R b/code/using_GO/pla/T18_1_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..f9bbf65690548739a0759054cd35fe5065696bf5 --- /dev/null +++ b/code/using_GO/pla/T18_1_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_1_gblup_variances_all=rep(list(list()),cycles) +T18_1_gblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_variances_all=rep(list(list()),cycles) +T18_1_gfblup_prediction_all=rep(list(list()),cycles) +T18_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_1...") + y=pheno_df_pla$T18_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_1_gblup_variances_all[[r]]<-var + T18_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_1_gblup_variances_all[[r]]<-list() + T18_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_1_gfblup_variances_all[[r]]<-var + T18_1_gfblup_prediction_all[[r]]<-pred + T18_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_1_gblup_variances_all,"T18_1_gblup_variances_all_6001_7297.rds") +saveRDS(T18_1_gblup_prediction_all,"T18_1_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_1_gfblup_variances_all,"T18_1_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_1_gfblup_prediction_all,"T18_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_1_gfblup_validate_all,"T18_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_2_1001_2000.R b/code/using_GO/pla/T18_2_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..49d59796cd38ce9902a9bf6e18895c393d8e2b76 --- /dev/null +++ b/code/using_GO/pla/T18_2_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_2_gblup_variances_all=rep(list(list()),cycles) +T18_2_gblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_variances_all=rep(list(list()),cycles) +T18_2_gfblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_2...") + y=pheno_df_pla$T18_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_2_gblup_variances_all[[r]]<-var + T18_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_2_gblup_variances_all[[r]]<-list() + T18_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_2_gfblup_variances_all[[r]]<-var + T18_2_gfblup_prediction_all[[r]]<-pred + T18_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_2_gblup_variances_all,"T18_2_gblup_variances_all_1001_2000.rds") +saveRDS(T18_2_gblup_prediction_all,"T18_2_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_2_gfblup_variances_all,"T18_2_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_2_gfblup_prediction_all,"T18_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_2_gfblup_validate_all,"T18_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_2_1_1000.R b/code/using_GO/pla/T18_2_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..a33fe9d6dffd3d5405c7614164318516a1a2d199 --- /dev/null +++ b/code/using_GO/pla/T18_2_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_2_gblup_variances_all=rep(list(list()),cycles) +T18_2_gblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_variances_all=rep(list(list()),cycles) +T18_2_gfblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_2...") + y=pheno_df_pla$T18_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_2_gblup_variances_all[[r]]<-var + T18_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_2_gblup_variances_all[[r]]<-list() + T18_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_2_gfblup_variances_all[[r]]<-var + T18_2_gfblup_prediction_all[[r]]<-pred + T18_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_2_gblup_variances_all,"T18_2_gblup_variances_all_1_1000.rds") +saveRDS(T18_2_gblup_prediction_all,"T18_2_gblup_prediction_all_1_1000.rds") +saveRDS(T18_2_gfblup_variances_all,"T18_2_gfblup_variances_all_1_1000.rds") +saveRDS(T18_2_gfblup_prediction_all,"T18_2_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_2_gfblup_validate_all,"T18_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_2_2001_3000.R b/code/using_GO/pla/T18_2_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..b69edc8cab2313b7f1ef4870a12a1f850c01b0e8 --- /dev/null +++ b/code/using_GO/pla/T18_2_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_2_gblup_variances_all=rep(list(list()),cycles) +T18_2_gblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_variances_all=rep(list(list()),cycles) +T18_2_gfblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_2...") + y=pheno_df_pla$T18_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_2_gblup_variances_all[[r]]<-var + T18_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_2_gblup_variances_all[[r]]<-list() + T18_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_2_gfblup_variances_all[[r]]<-var + T18_2_gfblup_prediction_all[[r]]<-pred + T18_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_2_gblup_variances_all,"T18_2_gblup_variances_all_2001_3000.rds") +saveRDS(T18_2_gblup_prediction_all,"T18_2_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_2_gfblup_variances_all,"T18_2_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_2_gfblup_prediction_all,"T18_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_2_gfblup_validate_all,"T18_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_2_3001_4000.R b/code/using_GO/pla/T18_2_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..a29a303ce22016aebba0daee2e10209077fe9864 --- /dev/null +++ b/code/using_GO/pla/T18_2_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_2_gblup_variances_all=rep(list(list()),cycles) +T18_2_gblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_variances_all=rep(list(list()),cycles) +T18_2_gfblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_2...") + y=pheno_df_pla$T18_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_2_gblup_variances_all[[r]]<-var + T18_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_2_gblup_variances_all[[r]]<-list() + T18_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_2_gfblup_variances_all[[r]]<-var + T18_2_gfblup_prediction_all[[r]]<-pred + T18_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_2_gblup_variances_all,"T18_2_gblup_variances_all_3001_4000.rds") +saveRDS(T18_2_gblup_prediction_all,"T18_2_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_2_gfblup_variances_all,"T18_2_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_2_gfblup_prediction_all,"T18_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_2_gfblup_validate_all,"T18_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_2_4001_5000.R b/code/using_GO/pla/T18_2_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..239fdb6777ffa2b23dc730fefbbfb7fe447b8dcc --- /dev/null +++ b/code/using_GO/pla/T18_2_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_2_gblup_variances_all=rep(list(list()),cycles) +T18_2_gblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_variances_all=rep(list(list()),cycles) +T18_2_gfblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_2...") + y=pheno_df_pla$T18_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_2_gblup_variances_all[[r]]<-var + T18_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_2_gblup_variances_all[[r]]<-list() + T18_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_2_gfblup_variances_all[[r]]<-var + T18_2_gfblup_prediction_all[[r]]<-pred + T18_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_2_gblup_variances_all,"T18_2_gblup_variances_all_4001_5000.rds") +saveRDS(T18_2_gblup_prediction_all,"T18_2_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_2_gfblup_variances_all,"T18_2_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_2_gfblup_prediction_all,"T18_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_2_gfblup_validate_all,"T18_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_2_5001_6000.R b/code/using_GO/pla/T18_2_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..95143f44b9bb7945cf245feaa814c9bedc53590c --- /dev/null +++ b/code/using_GO/pla/T18_2_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_2_gblup_variances_all=rep(list(list()),cycles) +T18_2_gblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_variances_all=rep(list(list()),cycles) +T18_2_gfblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_2...") + y=pheno_df_pla$T18_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_2_gblup_variances_all[[r]]<-var + T18_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_2_gblup_variances_all[[r]]<-list() + T18_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_2_gfblup_variances_all[[r]]<-var + T18_2_gfblup_prediction_all[[r]]<-pred + T18_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_2_gblup_variances_all,"T18_2_gblup_variances_all_5001_6000.rds") +saveRDS(T18_2_gblup_prediction_all,"T18_2_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_2_gfblup_variances_all,"T18_2_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_2_gfblup_prediction_all,"T18_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_2_gfblup_validate_all,"T18_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_2_6001_7297.R b/code/using_GO/pla/T18_2_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..8d7216d25c2814e6424316f2839dbf19a1a9c434 --- /dev/null +++ b/code/using_GO/pla/T18_2_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_2_gblup_variances_all=rep(list(list()),cycles) +T18_2_gblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_variances_all=rep(list(list()),cycles) +T18_2_gfblup_prediction_all=rep(list(list()),cycles) +T18_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_2...") + y=pheno_df_pla$T18_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_2_gblup_variances_all[[r]]<-var + T18_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_2_gblup_variances_all[[r]]<-list() + T18_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_2_gfblup_variances_all[[r]]<-var + T18_2_gfblup_prediction_all[[r]]<-pred + T18_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_2_gblup_variances_all,"T18_2_gblup_variances_all_6001_7297.rds") +saveRDS(T18_2_gblup_prediction_all,"T18_2_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_2_gfblup_variances_all,"T18_2_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_2_gfblup_prediction_all,"T18_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_2_gfblup_validate_all,"T18_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_3_1001_2000.R b/code/using_GO/pla/T18_3_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..5e541d70ee4202343ed808b6cc48c87b73f5a749 --- /dev/null +++ b/code/using_GO/pla/T18_3_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_3_gblup_variances_all=rep(list(list()),cycles) +T18_3_gblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_variances_all=rep(list(list()),cycles) +T18_3_gfblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_3...") + y=pheno_df_pla$T18_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_3_gblup_variances_all[[r]]<-var + T18_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_3_gblup_variances_all[[r]]<-list() + T18_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_3_gfblup_variances_all[[r]]<-var + T18_3_gfblup_prediction_all[[r]]<-pred + T18_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_3_gblup_variances_all,"T18_3_gblup_variances_all_1001_2000.rds") +saveRDS(T18_3_gblup_prediction_all,"T18_3_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_3_gfblup_variances_all,"T18_3_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_3_gfblup_prediction_all,"T18_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_3_gfblup_validate_all,"T18_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_3_1_1000.R b/code/using_GO/pla/T18_3_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..5a282084c814e34ea42088524b2f629ea3701672 --- /dev/null +++ b/code/using_GO/pla/T18_3_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_3_gblup_variances_all=rep(list(list()),cycles) +T18_3_gblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_variances_all=rep(list(list()),cycles) +T18_3_gfblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_3...") + y=pheno_df_pla$T18_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_3_gblup_variances_all[[r]]<-var + T18_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_3_gblup_variances_all[[r]]<-list() + T18_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_3_gfblup_variances_all[[r]]<-var + T18_3_gfblup_prediction_all[[r]]<-pred + T18_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_3_gblup_variances_all,"T18_3_gblup_variances_all_1_1000.rds") +saveRDS(T18_3_gblup_prediction_all,"T18_3_gblup_prediction_all_1_1000.rds") +saveRDS(T18_3_gfblup_variances_all,"T18_3_gfblup_variances_all_1_1000.rds") +saveRDS(T18_3_gfblup_prediction_all,"T18_3_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_3_gfblup_validate_all,"T18_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_3_2001_3000.R b/code/using_GO/pla/T18_3_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..d1d57df1adbda0f27d791504ad045d84b7f9b0e1 --- /dev/null +++ b/code/using_GO/pla/T18_3_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_3_gblup_variances_all=rep(list(list()),cycles) +T18_3_gblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_variances_all=rep(list(list()),cycles) +T18_3_gfblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_3...") + y=pheno_df_pla$T18_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_3_gblup_variances_all[[r]]<-var + T18_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_3_gblup_variances_all[[r]]<-list() + T18_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_3_gfblup_variances_all[[r]]<-var + T18_3_gfblup_prediction_all[[r]]<-pred + T18_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_3_gblup_variances_all,"T18_3_gblup_variances_all_2001_3000.rds") +saveRDS(T18_3_gblup_prediction_all,"T18_3_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_3_gfblup_variances_all,"T18_3_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_3_gfblup_prediction_all,"T18_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_3_gfblup_validate_all,"T18_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_3_3001_4000.R b/code/using_GO/pla/T18_3_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..72091067c772ad9cc3bdfd254fcccd23a619e522 --- /dev/null +++ b/code/using_GO/pla/T18_3_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_3_gblup_variances_all=rep(list(list()),cycles) +T18_3_gblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_variances_all=rep(list(list()),cycles) +T18_3_gfblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_3...") + y=pheno_df_pla$T18_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_3_gblup_variances_all[[r]]<-var + T18_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_3_gblup_variances_all[[r]]<-list() + T18_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_3_gfblup_variances_all[[r]]<-var + T18_3_gfblup_prediction_all[[r]]<-pred + T18_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_3_gblup_variances_all,"T18_3_gblup_variances_all_3001_4000.rds") +saveRDS(T18_3_gblup_prediction_all,"T18_3_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_3_gfblup_variances_all,"T18_3_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_3_gfblup_prediction_all,"T18_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_3_gfblup_validate_all,"T18_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_3_4001_5000.R b/code/using_GO/pla/T18_3_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..600b5b1449fed4716bf191ef73186da6108d9649 --- /dev/null +++ b/code/using_GO/pla/T18_3_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_3_gblup_variances_all=rep(list(list()),cycles) +T18_3_gblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_variances_all=rep(list(list()),cycles) +T18_3_gfblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_3...") + y=pheno_df_pla$T18_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_3_gblup_variances_all[[r]]<-var + T18_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_3_gblup_variances_all[[r]]<-list() + T18_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_3_gfblup_variances_all[[r]]<-var + T18_3_gfblup_prediction_all[[r]]<-pred + T18_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_3_gblup_variances_all,"T18_3_gblup_variances_all_4001_5000.rds") +saveRDS(T18_3_gblup_prediction_all,"T18_3_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_3_gfblup_variances_all,"T18_3_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_3_gfblup_prediction_all,"T18_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_3_gfblup_validate_all,"T18_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_3_5001_6000.R b/code/using_GO/pla/T18_3_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..dddf977f5b05340725b2e912d810bb09256e1572 --- /dev/null +++ b/code/using_GO/pla/T18_3_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_3_gblup_variances_all=rep(list(list()),cycles) +T18_3_gblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_variances_all=rep(list(list()),cycles) +T18_3_gfblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_3...") + y=pheno_df_pla$T18_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_3_gblup_variances_all[[r]]<-var + T18_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_3_gblup_variances_all[[r]]<-list() + T18_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_3_gfblup_variances_all[[r]]<-var + T18_3_gfblup_prediction_all[[r]]<-pred + T18_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_3_gblup_variances_all,"T18_3_gblup_variances_all_5001_6000.rds") +saveRDS(T18_3_gblup_prediction_all,"T18_3_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_3_gfblup_variances_all,"T18_3_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_3_gfblup_prediction_all,"T18_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_3_gfblup_validate_all,"T18_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_3_6001_7297.R b/code/using_GO/pla/T18_3_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..b2ad2ebf1ed898e34a2106abddd55e6489a42406 --- /dev/null +++ b/code/using_GO/pla/T18_3_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_3_gblup_variances_all=rep(list(list()),cycles) +T18_3_gblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_variances_all=rep(list(list()),cycles) +T18_3_gfblup_prediction_all=rep(list(list()),cycles) +T18_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_3...") + y=pheno_df_pla$T18_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_3_gblup_variances_all[[r]]<-var + T18_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_3_gblup_variances_all[[r]]<-list() + T18_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_3_gfblup_variances_all[[r]]<-var + T18_3_gfblup_prediction_all[[r]]<-pred + T18_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_3_gblup_variances_all,"T18_3_gblup_variances_all_6001_7297.rds") +saveRDS(T18_3_gblup_prediction_all,"T18_3_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_3_gfblup_variances_all,"T18_3_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_3_gfblup_prediction_all,"T18_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_3_gfblup_validate_all,"T18_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_4_1001_2000.R b/code/using_GO/pla/T18_4_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..f3f2b966d757b27eefb5fb7ec228c66ffa0a82e2 --- /dev/null +++ b/code/using_GO/pla/T18_4_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_4_gblup_variances_all=rep(list(list()),cycles) +T18_4_gblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_variances_all=rep(list(list()),cycles) +T18_4_gfblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_4...") + y=pheno_df_pla$T18_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_4_gblup_variances_all[[r]]<-var + T18_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_4_gblup_variances_all[[r]]<-list() + T18_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_4_gfblup_variances_all[[r]]<-var + T18_4_gfblup_prediction_all[[r]]<-pred + T18_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_4_gblup_variances_all,"T18_4_gblup_variances_all_1001_2000.rds") +saveRDS(T18_4_gblup_prediction_all,"T18_4_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_4_gfblup_variances_all,"T18_4_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_4_gfblup_prediction_all,"T18_4_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_4_gfblup_validate_all,"T18_4_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_4_1_1000.R b/code/using_GO/pla/T18_4_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..287f78496dc7f57b161308fe6e0495c7b0aedb2d --- /dev/null +++ b/code/using_GO/pla/T18_4_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_4_gblup_variances_all=rep(list(list()),cycles) +T18_4_gblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_variances_all=rep(list(list()),cycles) +T18_4_gfblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_4...") + y=pheno_df_pla$T18_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_4_gblup_variances_all[[r]]<-var + T18_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_4_gblup_variances_all[[r]]<-list() + T18_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_4_gfblup_variances_all[[r]]<-var + T18_4_gfblup_prediction_all[[r]]<-pred + T18_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_4_gblup_variances_all,"T18_4_gblup_variances_all_1_1000.rds") +saveRDS(T18_4_gblup_prediction_all,"T18_4_gblup_prediction_all_1_1000.rds") +saveRDS(T18_4_gfblup_variances_all,"T18_4_gfblup_variances_all_1_1000.rds") +saveRDS(T18_4_gfblup_prediction_all,"T18_4_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_4_gfblup_validate_all,"T18_4_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_4_2001_3000.R b/code/using_GO/pla/T18_4_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..c6212a6b706873501508c259ee6ece5b4261df83 --- /dev/null +++ b/code/using_GO/pla/T18_4_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_4_gblup_variances_all=rep(list(list()),cycles) +T18_4_gblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_variances_all=rep(list(list()),cycles) +T18_4_gfblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_4...") + y=pheno_df_pla$T18_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_4_gblup_variances_all[[r]]<-var + T18_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_4_gblup_variances_all[[r]]<-list() + T18_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_4_gfblup_variances_all[[r]]<-var + T18_4_gfblup_prediction_all[[r]]<-pred + T18_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_4_gblup_variances_all,"T18_4_gblup_variances_all_2001_3000.rds") +saveRDS(T18_4_gblup_prediction_all,"T18_4_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_4_gfblup_variances_all,"T18_4_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_4_gfblup_prediction_all,"T18_4_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_4_gfblup_validate_all,"T18_4_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_4_3001_4000.R b/code/using_GO/pla/T18_4_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..33598de71b91621e62d8c1e7197d10e20702ac3e --- /dev/null +++ b/code/using_GO/pla/T18_4_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_4_gblup_variances_all=rep(list(list()),cycles) +T18_4_gblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_variances_all=rep(list(list()),cycles) +T18_4_gfblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_4...") + y=pheno_df_pla$T18_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_4_gblup_variances_all[[r]]<-var + T18_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_4_gblup_variances_all[[r]]<-list() + T18_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_4_gfblup_variances_all[[r]]<-var + T18_4_gfblup_prediction_all[[r]]<-pred + T18_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_4_gblup_variances_all,"T18_4_gblup_variances_all_3001_4000.rds") +saveRDS(T18_4_gblup_prediction_all,"T18_4_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_4_gfblup_variances_all,"T18_4_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_4_gfblup_prediction_all,"T18_4_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_4_gfblup_validate_all,"T18_4_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_4_4001_5000.R b/code/using_GO/pla/T18_4_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..1dbce5c463b1e63407e0a24364df883503ac4450 --- /dev/null +++ b/code/using_GO/pla/T18_4_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_4_gblup_variances_all=rep(list(list()),cycles) +T18_4_gblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_variances_all=rep(list(list()),cycles) +T18_4_gfblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_4...") + y=pheno_df_pla$T18_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_4_gblup_variances_all[[r]]<-var + T18_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_4_gblup_variances_all[[r]]<-list() + T18_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_4_gfblup_variances_all[[r]]<-var + T18_4_gfblup_prediction_all[[r]]<-pred + T18_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_4_gblup_variances_all,"T18_4_gblup_variances_all_4001_5000.rds") +saveRDS(T18_4_gblup_prediction_all,"T18_4_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_4_gfblup_variances_all,"T18_4_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_4_gfblup_prediction_all,"T18_4_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_4_gfblup_validate_all,"T18_4_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_4_5001_6000.R b/code/using_GO/pla/T18_4_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..ec1a8a0af02f27f8631d2738ad973f33ecbd60d7 --- /dev/null +++ b/code/using_GO/pla/T18_4_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_4_gblup_variances_all=rep(list(list()),cycles) +T18_4_gblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_variances_all=rep(list(list()),cycles) +T18_4_gfblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_4...") + y=pheno_df_pla$T18_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_4_gblup_variances_all[[r]]<-var + T18_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_4_gblup_variances_all[[r]]<-list() + T18_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_4_gfblup_variances_all[[r]]<-var + T18_4_gfblup_prediction_all[[r]]<-pred + T18_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_4_gblup_variances_all,"T18_4_gblup_variances_all_5001_6000.rds") +saveRDS(T18_4_gblup_prediction_all,"T18_4_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_4_gfblup_variances_all,"T18_4_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_4_gfblup_prediction_all,"T18_4_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_4_gfblup_validate_all,"T18_4_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_4_6001_7297.R b/code/using_GO/pla/T18_4_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..3827199cafeb0c927dbd19d30028cec227f205fd --- /dev/null +++ b/code/using_GO/pla/T18_4_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_4_gblup_variances_all=rep(list(list()),cycles) +T18_4_gblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_variances_all=rep(list(list()),cycles) +T18_4_gfblup_prediction_all=rep(list(list()),cycles) +T18_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_4...") + y=pheno_df_pla$T18_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_4_gblup_variances_all[[r]]<-var + T18_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_4_gblup_variances_all[[r]]<-list() + T18_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_4_gfblup_variances_all[[r]]<-var + T18_4_gfblup_prediction_all[[r]]<-pred + T18_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_4_gblup_variances_all,"T18_4_gblup_variances_all_6001_7297.rds") +saveRDS(T18_4_gblup_prediction_all,"T18_4_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_4_gfblup_variances_all,"T18_4_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_4_gfblup_prediction_all,"T18_4_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_4_gfblup_validate_all,"T18_4_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_5_1001_2000.R b/code/using_GO/pla/T18_5_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..26fcb78f1fef98e5f82b7272b0e49d9ebfe86ef0 --- /dev/null +++ b/code/using_GO/pla/T18_5_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_5_gblup_variances_all=rep(list(list()),cycles) +T18_5_gblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_variances_all=rep(list(list()),cycles) +T18_5_gfblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_5...") + y=pheno_df_pla$T18_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_5_gblup_variances_all[[r]]<-var + T18_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_5_gblup_variances_all[[r]]<-list() + T18_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_5_gfblup_variances_all[[r]]<-var + T18_5_gfblup_prediction_all[[r]]<-pred + T18_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_5_gblup_variances_all,"T18_5_gblup_variances_all_1001_2000.rds") +saveRDS(T18_5_gblup_prediction_all,"T18_5_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_5_gfblup_variances_all,"T18_5_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_5_gfblup_prediction_all,"T18_5_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_5_gfblup_validate_all,"T18_5_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_5_1_1000.R b/code/using_GO/pla/T18_5_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..0a360771122e65b70abe82699e5aed382afb62a0 --- /dev/null +++ b/code/using_GO/pla/T18_5_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_5_gblup_variances_all=rep(list(list()),cycles) +T18_5_gblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_variances_all=rep(list(list()),cycles) +T18_5_gfblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_5...") + y=pheno_df_pla$T18_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_5_gblup_variances_all[[r]]<-var + T18_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_5_gblup_variances_all[[r]]<-list() + T18_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_5_gfblup_variances_all[[r]]<-var + T18_5_gfblup_prediction_all[[r]]<-pred + T18_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_5_gblup_variances_all,"T18_5_gblup_variances_all_1_1000.rds") +saveRDS(T18_5_gblup_prediction_all,"T18_5_gblup_prediction_all_1_1000.rds") +saveRDS(T18_5_gfblup_variances_all,"T18_5_gfblup_variances_all_1_1000.rds") +saveRDS(T18_5_gfblup_prediction_all,"T18_5_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_5_gfblup_validate_all,"T18_5_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_5_2001_3000.R b/code/using_GO/pla/T18_5_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..29dead93539296308de441985fdb331c4a9880a5 --- /dev/null +++ b/code/using_GO/pla/T18_5_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_5_gblup_variances_all=rep(list(list()),cycles) +T18_5_gblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_variances_all=rep(list(list()),cycles) +T18_5_gfblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_5...") + y=pheno_df_pla$T18_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_5_gblup_variances_all[[r]]<-var + T18_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_5_gblup_variances_all[[r]]<-list() + T18_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_5_gfblup_variances_all[[r]]<-var + T18_5_gfblup_prediction_all[[r]]<-pred + T18_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_5_gblup_variances_all,"T18_5_gblup_variances_all_2001_3000.rds") +saveRDS(T18_5_gblup_prediction_all,"T18_5_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_5_gfblup_variances_all,"T18_5_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_5_gfblup_prediction_all,"T18_5_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_5_gfblup_validate_all,"T18_5_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_5_3001_4000.R b/code/using_GO/pla/T18_5_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..4b7e1ad297bb1ea4412cf903aacc949a5d7bbcb5 --- /dev/null +++ b/code/using_GO/pla/T18_5_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_5_gblup_variances_all=rep(list(list()),cycles) +T18_5_gblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_variances_all=rep(list(list()),cycles) +T18_5_gfblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_5...") + y=pheno_df_pla$T18_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_5_gblup_variances_all[[r]]<-var + T18_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_5_gblup_variances_all[[r]]<-list() + T18_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_5_gfblup_variances_all[[r]]<-var + T18_5_gfblup_prediction_all[[r]]<-pred + T18_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_5_gblup_variances_all,"T18_5_gblup_variances_all_3001_4000.rds") +saveRDS(T18_5_gblup_prediction_all,"T18_5_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_5_gfblup_variances_all,"T18_5_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_5_gfblup_prediction_all,"T18_5_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_5_gfblup_validate_all,"T18_5_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_5_4001_5000.R b/code/using_GO/pla/T18_5_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..97e3b412ea2446d1262abdd9fb13e8678fd78bbe --- /dev/null +++ b/code/using_GO/pla/T18_5_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_5_gblup_variances_all=rep(list(list()),cycles) +T18_5_gblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_variances_all=rep(list(list()),cycles) +T18_5_gfblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_5...") + y=pheno_df_pla$T18_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_5_gblup_variances_all[[r]]<-var + T18_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_5_gblup_variances_all[[r]]<-list() + T18_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_5_gfblup_variances_all[[r]]<-var + T18_5_gfblup_prediction_all[[r]]<-pred + T18_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_5_gblup_variances_all,"T18_5_gblup_variances_all_4001_5000.rds") +saveRDS(T18_5_gblup_prediction_all,"T18_5_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_5_gfblup_variances_all,"T18_5_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_5_gfblup_prediction_all,"T18_5_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_5_gfblup_validate_all,"T18_5_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_5_5001_6000.R b/code/using_GO/pla/T18_5_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..7983d5c88cf422b592513a162cb80e79bf464b6b --- /dev/null +++ b/code/using_GO/pla/T18_5_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_5_gblup_variances_all=rep(list(list()),cycles) +T18_5_gblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_variances_all=rep(list(list()),cycles) +T18_5_gfblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_5...") + y=pheno_df_pla$T18_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_5_gblup_variances_all[[r]]<-var + T18_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_5_gblup_variances_all[[r]]<-list() + T18_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_5_gfblup_variances_all[[r]]<-var + T18_5_gfblup_prediction_all[[r]]<-pred + T18_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_5_gblup_variances_all,"T18_5_gblup_variances_all_5001_6000.rds") +saveRDS(T18_5_gblup_prediction_all,"T18_5_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_5_gfblup_variances_all,"T18_5_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_5_gfblup_prediction_all,"T18_5_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_5_gfblup_validate_all,"T18_5_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_5_6001_7297.R b/code/using_GO/pla/T18_5_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..e08624495c4ff93d23df8920040fc46563b8da6b --- /dev/null +++ b/code/using_GO/pla/T18_5_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_5_gblup_variances_all=rep(list(list()),cycles) +T18_5_gblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_variances_all=rep(list(list()),cycles) +T18_5_gfblup_prediction_all=rep(list(list()),cycles) +T18_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_5...") + y=pheno_df_pla$T18_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_5_gblup_variances_all[[r]]<-var + T18_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_5_gblup_variances_all[[r]]<-list() + T18_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_5_gfblup_variances_all[[r]]<-var + T18_5_gfblup_prediction_all[[r]]<-pred + T18_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_5_gblup_variances_all,"T18_5_gblup_variances_all_6001_7297.rds") +saveRDS(T18_5_gblup_prediction_all,"T18_5_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_5_gfblup_variances_all,"T18_5_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_5_gfblup_prediction_all,"T18_5_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_5_gfblup_validate_all,"T18_5_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_6_1001_2000.R b/code/using_GO/pla/T18_6_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..19a770a612880401f4288c0b57637f84dc2d5c4f --- /dev/null +++ b/code/using_GO/pla/T18_6_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_6_gblup_variances_all=rep(list(list()),cycles) +T18_6_gblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_variances_all=rep(list(list()),cycles) +T18_6_gfblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_6...") + y=pheno_df_pla$T18_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_6_gblup_variances_all[[r]]<-var + T18_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_6_gblup_variances_all[[r]]<-list() + T18_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_6_gfblup_variances_all[[r]]<-var + T18_6_gfblup_prediction_all[[r]]<-pred + T18_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_6_gblup_variances_all,"T18_6_gblup_variances_all_1001_2000.rds") +saveRDS(T18_6_gblup_prediction_all,"T18_6_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_6_gfblup_variances_all,"T18_6_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_6_gfblup_prediction_all,"T18_6_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_6_gfblup_validate_all,"T18_6_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_6_1_1000.R b/code/using_GO/pla/T18_6_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..5793c29bbe8ce2ef853350554c1161f7327d52aa --- /dev/null +++ b/code/using_GO/pla/T18_6_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_6_gblup_variances_all=rep(list(list()),cycles) +T18_6_gblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_variances_all=rep(list(list()),cycles) +T18_6_gfblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_6...") + y=pheno_df_pla$T18_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_6_gblup_variances_all[[r]]<-var + T18_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_6_gblup_variances_all[[r]]<-list() + T18_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_6_gfblup_variances_all[[r]]<-var + T18_6_gfblup_prediction_all[[r]]<-pred + T18_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_6_gblup_variances_all,"T18_6_gblup_variances_all_1_1000.rds") +saveRDS(T18_6_gblup_prediction_all,"T18_6_gblup_prediction_all_1_1000.rds") +saveRDS(T18_6_gfblup_variances_all,"T18_6_gfblup_variances_all_1_1000.rds") +saveRDS(T18_6_gfblup_prediction_all,"T18_6_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_6_gfblup_validate_all,"T18_6_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_6_2001_3000.R b/code/using_GO/pla/T18_6_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..dc17a9ec4e97d0e735db69eff0397cfdd8001412 --- /dev/null +++ b/code/using_GO/pla/T18_6_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_6_gblup_variances_all=rep(list(list()),cycles) +T18_6_gblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_variances_all=rep(list(list()),cycles) +T18_6_gfblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_6...") + y=pheno_df_pla$T18_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_6_gblup_variances_all[[r]]<-var + T18_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_6_gblup_variances_all[[r]]<-list() + T18_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_6_gfblup_variances_all[[r]]<-var + T18_6_gfblup_prediction_all[[r]]<-pred + T18_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_6_gblup_variances_all,"T18_6_gblup_variances_all_2001_3000.rds") +saveRDS(T18_6_gblup_prediction_all,"T18_6_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_6_gfblup_variances_all,"T18_6_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_6_gfblup_prediction_all,"T18_6_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_6_gfblup_validate_all,"T18_6_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_6_3001_4000.R b/code/using_GO/pla/T18_6_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..7c305d444e635ebb2cff071153ecddf0a266056d --- /dev/null +++ b/code/using_GO/pla/T18_6_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_6_gblup_variances_all=rep(list(list()),cycles) +T18_6_gblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_variances_all=rep(list(list()),cycles) +T18_6_gfblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_6...") + y=pheno_df_pla$T18_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_6_gblup_variances_all[[r]]<-var + T18_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_6_gblup_variances_all[[r]]<-list() + T18_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_6_gfblup_variances_all[[r]]<-var + T18_6_gfblup_prediction_all[[r]]<-pred + T18_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_6_gblup_variances_all,"T18_6_gblup_variances_all_3001_4000.rds") +saveRDS(T18_6_gblup_prediction_all,"T18_6_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_6_gfblup_variances_all,"T18_6_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_6_gfblup_prediction_all,"T18_6_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_6_gfblup_validate_all,"T18_6_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_6_4001_5000.R b/code/using_GO/pla/T18_6_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..fd400b753f410380d5d15ebde9fa45ac7f10078b --- /dev/null +++ b/code/using_GO/pla/T18_6_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_6_gblup_variances_all=rep(list(list()),cycles) +T18_6_gblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_variances_all=rep(list(list()),cycles) +T18_6_gfblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_6...") + y=pheno_df_pla$T18_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_6_gblup_variances_all[[r]]<-var + T18_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_6_gblup_variances_all[[r]]<-list() + T18_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_6_gfblup_variances_all[[r]]<-var + T18_6_gfblup_prediction_all[[r]]<-pred + T18_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_6_gblup_variances_all,"T18_6_gblup_variances_all_4001_5000.rds") +saveRDS(T18_6_gblup_prediction_all,"T18_6_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_6_gfblup_variances_all,"T18_6_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_6_gfblup_prediction_all,"T18_6_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_6_gfblup_validate_all,"T18_6_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_6_5001_6000.R b/code/using_GO/pla/T18_6_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..6016036de8e1a7e6fa298d0ce4f543c8465f49a0 --- /dev/null +++ b/code/using_GO/pla/T18_6_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_6_gblup_variances_all=rep(list(list()),cycles) +T18_6_gblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_variances_all=rep(list(list()),cycles) +T18_6_gfblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_6...") + y=pheno_df_pla$T18_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_6_gblup_variances_all[[r]]<-var + T18_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_6_gblup_variances_all[[r]]<-list() + T18_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_6_gfblup_variances_all[[r]]<-var + T18_6_gfblup_prediction_all[[r]]<-pred + T18_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_6_gblup_variances_all,"T18_6_gblup_variances_all_5001_6000.rds") +saveRDS(T18_6_gblup_prediction_all,"T18_6_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_6_gfblup_variances_all,"T18_6_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_6_gfblup_prediction_all,"T18_6_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_6_gfblup_validate_all,"T18_6_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_6_6001_7297.R b/code/using_GO/pla/T18_6_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..32ed66721ec64fc413fed41ce12cce62b8004bb3 --- /dev/null +++ b/code/using_GO/pla/T18_6_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_6_gblup_variances_all=rep(list(list()),cycles) +T18_6_gblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_variances_all=rep(list(list()),cycles) +T18_6_gfblup_prediction_all=rep(list(list()),cycles) +T18_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_6...") + y=pheno_df_pla$T18_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_6_gblup_variances_all[[r]]<-var + T18_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_6_gblup_variances_all[[r]]<-list() + T18_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_6_gfblup_variances_all[[r]]<-var + T18_6_gfblup_prediction_all[[r]]<-pred + T18_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_6_gblup_variances_all,"T18_6_gblup_variances_all_6001_7297.rds") +saveRDS(T18_6_gblup_prediction_all,"T18_6_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_6_gfblup_variances_all,"T18_6_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_6_gfblup_prediction_all,"T18_6_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_6_gfblup_validate_all,"T18_6_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_7_1001_2000.R b/code/using_GO/pla/T18_7_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..12d6413c2e2ae46fb21ae9b36430fb62825551e8 --- /dev/null +++ b/code/using_GO/pla/T18_7_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_7_gblup_variances_all=rep(list(list()),cycles) +T18_7_gblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_variances_all=rep(list(list()),cycles) +T18_7_gfblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_7...") + y=pheno_df_pla$T18_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_7_gblup_variances_all[[r]]<-var + T18_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_7_gblup_variances_all[[r]]<-list() + T18_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_7_gfblup_variances_all[[r]]<-var + T18_7_gfblup_prediction_all[[r]]<-pred + T18_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_7_gblup_variances_all,"T18_7_gblup_variances_all_1001_2000.rds") +saveRDS(T18_7_gblup_prediction_all,"T18_7_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_7_gfblup_variances_all,"T18_7_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_7_gfblup_prediction_all,"T18_7_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_7_gfblup_validate_all,"T18_7_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_7_1_1000.R b/code/using_GO/pla/T18_7_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..160483dc9a120ef40a70b3cac03d8ec59dc8e1e1 --- /dev/null +++ b/code/using_GO/pla/T18_7_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_7_gblup_variances_all=rep(list(list()),cycles) +T18_7_gblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_variances_all=rep(list(list()),cycles) +T18_7_gfblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_7...") + y=pheno_df_pla$T18_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_7_gblup_variances_all[[r]]<-var + T18_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_7_gblup_variances_all[[r]]<-list() + T18_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_7_gfblup_variances_all[[r]]<-var + T18_7_gfblup_prediction_all[[r]]<-pred + T18_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_7_gblup_variances_all,"T18_7_gblup_variances_all_1_1000.rds") +saveRDS(T18_7_gblup_prediction_all,"T18_7_gblup_prediction_all_1_1000.rds") +saveRDS(T18_7_gfblup_variances_all,"T18_7_gfblup_variances_all_1_1000.rds") +saveRDS(T18_7_gfblup_prediction_all,"T18_7_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_7_gfblup_validate_all,"T18_7_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_7_2001_3000.R b/code/using_GO/pla/T18_7_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..8a7325564813bdb7314bf1230b7028c7edaa92ff --- /dev/null +++ b/code/using_GO/pla/T18_7_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_7_gblup_variances_all=rep(list(list()),cycles) +T18_7_gblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_variances_all=rep(list(list()),cycles) +T18_7_gfblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_7...") + y=pheno_df_pla$T18_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_7_gblup_variances_all[[r]]<-var + T18_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_7_gblup_variances_all[[r]]<-list() + T18_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_7_gfblup_variances_all[[r]]<-var + T18_7_gfblup_prediction_all[[r]]<-pred + T18_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_7_gblup_variances_all,"T18_7_gblup_variances_all_2001_3000.rds") +saveRDS(T18_7_gblup_prediction_all,"T18_7_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_7_gfblup_variances_all,"T18_7_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_7_gfblup_prediction_all,"T18_7_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_7_gfblup_validate_all,"T18_7_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_7_3001_4000.R b/code/using_GO/pla/T18_7_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..24a77c8bad8c5c2012b711fab61877bb787e89bb --- /dev/null +++ b/code/using_GO/pla/T18_7_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_7_gblup_variances_all=rep(list(list()),cycles) +T18_7_gblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_variances_all=rep(list(list()),cycles) +T18_7_gfblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_7...") + y=pheno_df_pla$T18_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_7_gblup_variances_all[[r]]<-var + T18_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_7_gblup_variances_all[[r]]<-list() + T18_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_7_gfblup_variances_all[[r]]<-var + T18_7_gfblup_prediction_all[[r]]<-pred + T18_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_7_gblup_variances_all,"T18_7_gblup_variances_all_3001_4000.rds") +saveRDS(T18_7_gblup_prediction_all,"T18_7_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_7_gfblup_variances_all,"T18_7_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_7_gfblup_prediction_all,"T18_7_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_7_gfblup_validate_all,"T18_7_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_7_4001_5000.R b/code/using_GO/pla/T18_7_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..eb76ec90bd7a7ac6000f69a79a455455d45d5627 --- /dev/null +++ b/code/using_GO/pla/T18_7_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_7_gblup_variances_all=rep(list(list()),cycles) +T18_7_gblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_variances_all=rep(list(list()),cycles) +T18_7_gfblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_7...") + y=pheno_df_pla$T18_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_7_gblup_variances_all[[r]]<-var + T18_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_7_gblup_variances_all[[r]]<-list() + T18_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_7_gfblup_variances_all[[r]]<-var + T18_7_gfblup_prediction_all[[r]]<-pred + T18_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_7_gblup_variances_all,"T18_7_gblup_variances_all_4001_5000.rds") +saveRDS(T18_7_gblup_prediction_all,"T18_7_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_7_gfblup_variances_all,"T18_7_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_7_gfblup_prediction_all,"T18_7_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_7_gfblup_validate_all,"T18_7_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_7_5001_6000.R b/code/using_GO/pla/T18_7_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..eb898ff1f296d41f1bf34626fce63f072a6e2926 --- /dev/null +++ b/code/using_GO/pla/T18_7_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_7_gblup_variances_all=rep(list(list()),cycles) +T18_7_gblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_variances_all=rep(list(list()),cycles) +T18_7_gfblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_7...") + y=pheno_df_pla$T18_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_7_gblup_variances_all[[r]]<-var + T18_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_7_gblup_variances_all[[r]]<-list() + T18_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_7_gfblup_variances_all[[r]]<-var + T18_7_gfblup_prediction_all[[r]]<-pred + T18_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_7_gblup_variances_all,"T18_7_gblup_variances_all_5001_6000.rds") +saveRDS(T18_7_gblup_prediction_all,"T18_7_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_7_gfblup_variances_all,"T18_7_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_7_gfblup_prediction_all,"T18_7_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_7_gfblup_validate_all,"T18_7_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_7_6001_7297.R b/code/using_GO/pla/T18_7_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..8d9de369233dc701474fa4bcd50b85086ca085aa --- /dev/null +++ b/code/using_GO/pla/T18_7_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_7_gblup_variances_all=rep(list(list()),cycles) +T18_7_gblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_variances_all=rep(list(list()),cycles) +T18_7_gfblup_prediction_all=rep(list(list()),cycles) +T18_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_7...") + y=pheno_df_pla$T18_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_7_gblup_variances_all[[r]]<-var + T18_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_7_gblup_variances_all[[r]]<-list() + T18_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_7_gfblup_variances_all[[r]]<-var + T18_7_gfblup_prediction_all[[r]]<-pred + T18_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_7_gblup_variances_all,"T18_7_gblup_variances_all_6001_7297.rds") +saveRDS(T18_7_gblup_prediction_all,"T18_7_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_7_gfblup_variances_all,"T18_7_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_7_gfblup_prediction_all,"T18_7_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_7_gfblup_validate_all,"T18_7_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_8_1001_2000.R b/code/using_GO/pla/T18_8_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..198b00754dc71963b989cacb6f0e9dd877f2ed59 --- /dev/null +++ b/code/using_GO/pla/T18_8_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_8_gblup_variances_all=rep(list(list()),cycles) +T18_8_gblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_variances_all=rep(list(list()),cycles) +T18_8_gfblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_8...") + y=pheno_df_pla$T18_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_8_gblup_variances_all[[r]]<-var + T18_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_8_gblup_variances_all[[r]]<-list() + T18_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_8_gfblup_variances_all[[r]]<-var + T18_8_gfblup_prediction_all[[r]]<-pred + T18_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_8_gblup_variances_all,"T18_8_gblup_variances_all_1001_2000.rds") +saveRDS(T18_8_gblup_prediction_all,"T18_8_gblup_prediction_all_1001_2000.rds") +saveRDS(T18_8_gfblup_variances_all,"T18_8_gfblup_variances_all_1001_2000.rds") +saveRDS(T18_8_gfblup_prediction_all,"T18_8_gfblup_prediction_all_1001_2000.rds") +saveRDS(T18_8_gfblup_validate_all,"T18_8_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_8_1_1000.R b/code/using_GO/pla/T18_8_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..bce9d22f7f8bd15a9e8d18b57bf67c5f3c8b2d86 --- /dev/null +++ b/code/using_GO/pla/T18_8_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_8_gblup_variances_all=rep(list(list()),cycles) +T18_8_gblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_variances_all=rep(list(list()),cycles) +T18_8_gfblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_8...") + y=pheno_df_pla$T18_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_8_gblup_variances_all[[r]]<-var + T18_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_8_gblup_variances_all[[r]]<-list() + T18_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_8_gfblup_variances_all[[r]]<-var + T18_8_gfblup_prediction_all[[r]]<-pred + T18_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_8_gblup_variances_all,"T18_8_gblup_variances_all_1_1000.rds") +saveRDS(T18_8_gblup_prediction_all,"T18_8_gblup_prediction_all_1_1000.rds") +saveRDS(T18_8_gfblup_variances_all,"T18_8_gfblup_variances_all_1_1000.rds") +saveRDS(T18_8_gfblup_prediction_all,"T18_8_gfblup_prediction_all_1_1000.rds") +saveRDS(T18_8_gfblup_validate_all,"T18_8_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_8_2001_3000.R b/code/using_GO/pla/T18_8_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..627207793e3585760670d83f6a106dc79a3044a2 --- /dev/null +++ b/code/using_GO/pla/T18_8_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_8_gblup_variances_all=rep(list(list()),cycles) +T18_8_gblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_variances_all=rep(list(list()),cycles) +T18_8_gfblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_8...") + y=pheno_df_pla$T18_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_8_gblup_variances_all[[r]]<-var + T18_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_8_gblup_variances_all[[r]]<-list() + T18_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_8_gfblup_variances_all[[r]]<-var + T18_8_gfblup_prediction_all[[r]]<-pred + T18_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_8_gblup_variances_all,"T18_8_gblup_variances_all_2001_3000.rds") +saveRDS(T18_8_gblup_prediction_all,"T18_8_gblup_prediction_all_2001_3000.rds") +saveRDS(T18_8_gfblup_variances_all,"T18_8_gfblup_variances_all_2001_3000.rds") +saveRDS(T18_8_gfblup_prediction_all,"T18_8_gfblup_prediction_all_2001_3000.rds") +saveRDS(T18_8_gfblup_validate_all,"T18_8_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_8_3001_4000.R b/code/using_GO/pla/T18_8_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..5f09c0327bed99049ecd32ab39e125996ddc87d6 --- /dev/null +++ b/code/using_GO/pla/T18_8_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_8_gblup_variances_all=rep(list(list()),cycles) +T18_8_gblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_variances_all=rep(list(list()),cycles) +T18_8_gfblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_8...") + y=pheno_df_pla$T18_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_8_gblup_variances_all[[r]]<-var + T18_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_8_gblup_variances_all[[r]]<-list() + T18_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_8_gfblup_variances_all[[r]]<-var + T18_8_gfblup_prediction_all[[r]]<-pred + T18_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_8_gblup_variances_all,"T18_8_gblup_variances_all_3001_4000.rds") +saveRDS(T18_8_gblup_prediction_all,"T18_8_gblup_prediction_all_3001_4000.rds") +saveRDS(T18_8_gfblup_variances_all,"T18_8_gfblup_variances_all_3001_4000.rds") +saveRDS(T18_8_gfblup_prediction_all,"T18_8_gfblup_prediction_all_3001_4000.rds") +saveRDS(T18_8_gfblup_validate_all,"T18_8_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_8_4001_5000.R b/code/using_GO/pla/T18_8_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..9b870c8767efb1291a0aed465fb3ecd7c3283026 --- /dev/null +++ b/code/using_GO/pla/T18_8_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_8_gblup_variances_all=rep(list(list()),cycles) +T18_8_gblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_variances_all=rep(list(list()),cycles) +T18_8_gfblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_8...") + y=pheno_df_pla$T18_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_8_gblup_variances_all[[r]]<-var + T18_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_8_gblup_variances_all[[r]]<-list() + T18_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_8_gfblup_variances_all[[r]]<-var + T18_8_gfblup_prediction_all[[r]]<-pred + T18_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_8_gblup_variances_all,"T18_8_gblup_variances_all_4001_5000.rds") +saveRDS(T18_8_gblup_prediction_all,"T18_8_gblup_prediction_all_4001_5000.rds") +saveRDS(T18_8_gfblup_variances_all,"T18_8_gfblup_variances_all_4001_5000.rds") +saveRDS(T18_8_gfblup_prediction_all,"T18_8_gfblup_prediction_all_4001_5000.rds") +saveRDS(T18_8_gfblup_validate_all,"T18_8_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_8_5001_6000.R b/code/using_GO/pla/T18_8_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..efe52ac7ee7cffc3a1afbbd71a94d4402dc82d8b --- /dev/null +++ b/code/using_GO/pla/T18_8_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_8_gblup_variances_all=rep(list(list()),cycles) +T18_8_gblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_variances_all=rep(list(list()),cycles) +T18_8_gfblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_8...") + y=pheno_df_pla$T18_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_8_gblup_variances_all[[r]]<-var + T18_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_8_gblup_variances_all[[r]]<-list() + T18_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_8_gfblup_variances_all[[r]]<-var + T18_8_gfblup_prediction_all[[r]]<-pred + T18_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_8_gblup_variances_all,"T18_8_gblup_variances_all_5001_6000.rds") +saveRDS(T18_8_gblup_prediction_all,"T18_8_gblup_prediction_all_5001_6000.rds") +saveRDS(T18_8_gfblup_variances_all,"T18_8_gfblup_variances_all_5001_6000.rds") +saveRDS(T18_8_gfblup_prediction_all,"T18_8_gfblup_prediction_all_5001_6000.rds") +saveRDS(T18_8_gfblup_validate_all,"T18_8_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T18_8_6001_7297.R b/code/using_GO/pla/T18_8_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..e91b2ed9a6f09859a1ff3b5fdc6912d0d86b3a5f --- /dev/null +++ b/code/using_GO/pla/T18_8_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T18_8_gblup_variances_all=rep(list(list()),cycles) +T18_8_gblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_variances_all=rep(list(list()),cycles) +T18_8_gfblup_prediction_all=rep(list(list()),cycles) +T18_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T18_8...") + y=pheno_df_pla$T18_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T18_8_gblup_variances_all[[r]]<-var + T18_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T18_8_gblup_variances_all[[r]]<-list() + T18_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T18_8_gfblup_variances_all[[r]]<-var + T18_8_gfblup_prediction_all[[r]]<-pred + T18_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T18_8_gblup_variances_all,"T18_8_gblup_variances_all_6001_7297.rds") +saveRDS(T18_8_gblup_prediction_all,"T18_8_gblup_prediction_all_6001_7297.rds") +saveRDS(T18_8_gfblup_variances_all,"T18_8_gfblup_variances_all_6001_7297.rds") +saveRDS(T18_8_gfblup_prediction_all,"T18_8_gfblup_prediction_all_6001_7297.rds") +saveRDS(T18_8_gfblup_validate_all,"T18_8_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_1_1001_2000.R b/code/using_GO/pla/T19_1_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..a24d3bd166260892d6f4e367c17f711bb58c6e82 --- /dev/null +++ b/code/using_GO/pla/T19_1_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_1_gblup_variances_all=rep(list(list()),cycles) +T19_1_gblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_variances_all=rep(list(list()),cycles) +T19_1_gfblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_1...") + y=pheno_df_pla$T19_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_1_gblup_variances_all[[r]]<-var + T19_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_1_gblup_variances_all[[r]]<-list() + T19_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_1_gfblup_variances_all[[r]]<-var + T19_1_gfblup_prediction_all[[r]]<-pred + T19_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_1_gblup_variances_all,"T19_1_gblup_variances_all_1001_2000.rds") +saveRDS(T19_1_gblup_prediction_all,"T19_1_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_1_gfblup_variances_all,"T19_1_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_1_gfblup_prediction_all,"T19_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_1_gfblup_validate_all,"T19_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_1_1_1000.R b/code/using_GO/pla/T19_1_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..df74bb6b4a633d233523d30d52912d9eba294bb8 --- /dev/null +++ b/code/using_GO/pla/T19_1_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_1_gblup_variances_all=rep(list(list()),cycles) +T19_1_gblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_variances_all=rep(list(list()),cycles) +T19_1_gfblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_1...") + y=pheno_df_pla$T19_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_1_gblup_variances_all[[r]]<-var + T19_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_1_gblup_variances_all[[r]]<-list() + T19_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_1_gfblup_variances_all[[r]]<-var + T19_1_gfblup_prediction_all[[r]]<-pred + T19_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_1_gblup_variances_all,"T19_1_gblup_variances_all_1_1000.rds") +saveRDS(T19_1_gblup_prediction_all,"T19_1_gblup_prediction_all_1_1000.rds") +saveRDS(T19_1_gfblup_variances_all,"T19_1_gfblup_variances_all_1_1000.rds") +saveRDS(T19_1_gfblup_prediction_all,"T19_1_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_1_gfblup_validate_all,"T19_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_1_2001_3000.R b/code/using_GO/pla/T19_1_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..9993149fcd1f7911c8ed88171c7a7c62145569f0 --- /dev/null +++ b/code/using_GO/pla/T19_1_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_1_gblup_variances_all=rep(list(list()),cycles) +T19_1_gblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_variances_all=rep(list(list()),cycles) +T19_1_gfblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_1...") + y=pheno_df_pla$T19_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_1_gblup_variances_all[[r]]<-var + T19_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_1_gblup_variances_all[[r]]<-list() + T19_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_1_gfblup_variances_all[[r]]<-var + T19_1_gfblup_prediction_all[[r]]<-pred + T19_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_1_gblup_variances_all,"T19_1_gblup_variances_all_2001_3000.rds") +saveRDS(T19_1_gblup_prediction_all,"T19_1_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_1_gfblup_variances_all,"T19_1_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_1_gfblup_prediction_all,"T19_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_1_gfblup_validate_all,"T19_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_1_3001_4000.R b/code/using_GO/pla/T19_1_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..3acc5e88a6c6d613933e4571feacafe955b1f823 --- /dev/null +++ b/code/using_GO/pla/T19_1_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_1_gblup_variances_all=rep(list(list()),cycles) +T19_1_gblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_variances_all=rep(list(list()),cycles) +T19_1_gfblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_1...") + y=pheno_df_pla$T19_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_1_gblup_variances_all[[r]]<-var + T19_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_1_gblup_variances_all[[r]]<-list() + T19_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_1_gfblup_variances_all[[r]]<-var + T19_1_gfblup_prediction_all[[r]]<-pred + T19_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_1_gblup_variances_all,"T19_1_gblup_variances_all_3001_4000.rds") +saveRDS(T19_1_gblup_prediction_all,"T19_1_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_1_gfblup_variances_all,"T19_1_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_1_gfblup_prediction_all,"T19_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_1_gfblup_validate_all,"T19_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_1_4001_5000.R b/code/using_GO/pla/T19_1_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..c1f73901b2550978ac8b9cdbfea8074946653cc1 --- /dev/null +++ b/code/using_GO/pla/T19_1_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_1_gblup_variances_all=rep(list(list()),cycles) +T19_1_gblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_variances_all=rep(list(list()),cycles) +T19_1_gfblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_1...") + y=pheno_df_pla$T19_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_1_gblup_variances_all[[r]]<-var + T19_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_1_gblup_variances_all[[r]]<-list() + T19_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_1_gfblup_variances_all[[r]]<-var + T19_1_gfblup_prediction_all[[r]]<-pred + T19_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_1_gblup_variances_all,"T19_1_gblup_variances_all_4001_5000.rds") +saveRDS(T19_1_gblup_prediction_all,"T19_1_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_1_gfblup_variances_all,"T19_1_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_1_gfblup_prediction_all,"T19_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_1_gfblup_validate_all,"T19_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_1_5001_6000.R b/code/using_GO/pla/T19_1_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..43fe40c6f9cca70c02e91c4ff32e548990ce945f --- /dev/null +++ b/code/using_GO/pla/T19_1_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_1_gblup_variances_all=rep(list(list()),cycles) +T19_1_gblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_variances_all=rep(list(list()),cycles) +T19_1_gfblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_1...") + y=pheno_df_pla$T19_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_1_gblup_variances_all[[r]]<-var + T19_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_1_gblup_variances_all[[r]]<-list() + T19_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_1_gfblup_variances_all[[r]]<-var + T19_1_gfblup_prediction_all[[r]]<-pred + T19_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_1_gblup_variances_all,"T19_1_gblup_variances_all_5001_6000.rds") +saveRDS(T19_1_gblup_prediction_all,"T19_1_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_1_gfblup_variances_all,"T19_1_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_1_gfblup_prediction_all,"T19_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_1_gfblup_validate_all,"T19_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_1_6001_7297.R b/code/using_GO/pla/T19_1_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..b5653add1576ec7a8910169c6218f82ac5bab913 --- /dev/null +++ b/code/using_GO/pla/T19_1_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_1_gblup_variances_all=rep(list(list()),cycles) +T19_1_gblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_variances_all=rep(list(list()),cycles) +T19_1_gfblup_prediction_all=rep(list(list()),cycles) +T19_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_1...") + y=pheno_df_pla$T19_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_1_gblup_variances_all[[r]]<-var + T19_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_1_gblup_variances_all[[r]]<-list() + T19_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_1_gfblup_variances_all[[r]]<-var + T19_1_gfblup_prediction_all[[r]]<-pred + T19_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_1_gblup_variances_all,"T19_1_gblup_variances_all_6001_7297.rds") +saveRDS(T19_1_gblup_prediction_all,"T19_1_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_1_gfblup_variances_all,"T19_1_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_1_gfblup_prediction_all,"T19_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_1_gfblup_validate_all,"T19_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_2_1001_2000.R b/code/using_GO/pla/T19_2_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..32966c178b82e9c40c21bb3b4cf8a692a4d6bccb --- /dev/null +++ b/code/using_GO/pla/T19_2_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_2_gblup_variances_all=rep(list(list()),cycles) +T19_2_gblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_variances_all=rep(list(list()),cycles) +T19_2_gfblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_2...") + y=pheno_df_pla$T19_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_2_gblup_variances_all[[r]]<-var + T19_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_2_gblup_variances_all[[r]]<-list() + T19_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_2_gfblup_variances_all[[r]]<-var + T19_2_gfblup_prediction_all[[r]]<-pred + T19_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_2_gblup_variances_all,"T19_2_gblup_variances_all_1001_2000.rds") +saveRDS(T19_2_gblup_prediction_all,"T19_2_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_2_gfblup_variances_all,"T19_2_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_2_gfblup_prediction_all,"T19_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_2_gfblup_validate_all,"T19_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_2_1_1000.R b/code/using_GO/pla/T19_2_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..6921cb1b1c5801b1319ba87ec98499cfd921da90 --- /dev/null +++ b/code/using_GO/pla/T19_2_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_2_gblup_variances_all=rep(list(list()),cycles) +T19_2_gblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_variances_all=rep(list(list()),cycles) +T19_2_gfblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_2...") + y=pheno_df_pla$T19_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_2_gblup_variances_all[[r]]<-var + T19_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_2_gblup_variances_all[[r]]<-list() + T19_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_2_gfblup_variances_all[[r]]<-var + T19_2_gfblup_prediction_all[[r]]<-pred + T19_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_2_gblup_variances_all,"T19_2_gblup_variances_all_1_1000.rds") +saveRDS(T19_2_gblup_prediction_all,"T19_2_gblup_prediction_all_1_1000.rds") +saveRDS(T19_2_gfblup_variances_all,"T19_2_gfblup_variances_all_1_1000.rds") +saveRDS(T19_2_gfblup_prediction_all,"T19_2_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_2_gfblup_validate_all,"T19_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_2_2001_3000.R b/code/using_GO/pla/T19_2_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..dd72470e7fe5571e2737de39c3567b067275e4b0 --- /dev/null +++ b/code/using_GO/pla/T19_2_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_2_gblup_variances_all=rep(list(list()),cycles) +T19_2_gblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_variances_all=rep(list(list()),cycles) +T19_2_gfblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_2...") + y=pheno_df_pla$T19_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_2_gblup_variances_all[[r]]<-var + T19_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_2_gblup_variances_all[[r]]<-list() + T19_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_2_gfblup_variances_all[[r]]<-var + T19_2_gfblup_prediction_all[[r]]<-pred + T19_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_2_gblup_variances_all,"T19_2_gblup_variances_all_2001_3000.rds") +saveRDS(T19_2_gblup_prediction_all,"T19_2_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_2_gfblup_variances_all,"T19_2_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_2_gfblup_prediction_all,"T19_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_2_gfblup_validate_all,"T19_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_2_3001_4000.R b/code/using_GO/pla/T19_2_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..37815c874f15808ce45ac6c84ff8ca4441cba316 --- /dev/null +++ b/code/using_GO/pla/T19_2_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_2_gblup_variances_all=rep(list(list()),cycles) +T19_2_gblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_variances_all=rep(list(list()),cycles) +T19_2_gfblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_2...") + y=pheno_df_pla$T19_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_2_gblup_variances_all[[r]]<-var + T19_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_2_gblup_variances_all[[r]]<-list() + T19_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_2_gfblup_variances_all[[r]]<-var + T19_2_gfblup_prediction_all[[r]]<-pred + T19_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_2_gblup_variances_all,"T19_2_gblup_variances_all_3001_4000.rds") +saveRDS(T19_2_gblup_prediction_all,"T19_2_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_2_gfblup_variances_all,"T19_2_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_2_gfblup_prediction_all,"T19_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_2_gfblup_validate_all,"T19_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_2_4001_5000.R b/code/using_GO/pla/T19_2_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..086a394dce42794b480fe4177f174a4ce4fc24ce --- /dev/null +++ b/code/using_GO/pla/T19_2_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_2_gblup_variances_all=rep(list(list()),cycles) +T19_2_gblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_variances_all=rep(list(list()),cycles) +T19_2_gfblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_2...") + y=pheno_df_pla$T19_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_2_gblup_variances_all[[r]]<-var + T19_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_2_gblup_variances_all[[r]]<-list() + T19_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_2_gfblup_variances_all[[r]]<-var + T19_2_gfblup_prediction_all[[r]]<-pred + T19_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_2_gblup_variances_all,"T19_2_gblup_variances_all_4001_5000.rds") +saveRDS(T19_2_gblup_prediction_all,"T19_2_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_2_gfblup_variances_all,"T19_2_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_2_gfblup_prediction_all,"T19_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_2_gfblup_validate_all,"T19_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_2_5001_6000.R b/code/using_GO/pla/T19_2_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..7633b44c485756bc2c8e7f733db4e0a25d9ea491 --- /dev/null +++ b/code/using_GO/pla/T19_2_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_2_gblup_variances_all=rep(list(list()),cycles) +T19_2_gblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_variances_all=rep(list(list()),cycles) +T19_2_gfblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_2...") + y=pheno_df_pla$T19_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_2_gblup_variances_all[[r]]<-var + T19_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_2_gblup_variances_all[[r]]<-list() + T19_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_2_gfblup_variances_all[[r]]<-var + T19_2_gfblup_prediction_all[[r]]<-pred + T19_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_2_gblup_variances_all,"T19_2_gblup_variances_all_5001_6000.rds") +saveRDS(T19_2_gblup_prediction_all,"T19_2_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_2_gfblup_variances_all,"T19_2_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_2_gfblup_prediction_all,"T19_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_2_gfblup_validate_all,"T19_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_2_6001_7297.R b/code/using_GO/pla/T19_2_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..76fed4e3dadf108ee2cd6b4aed38562e3d5b5738 --- /dev/null +++ b/code/using_GO/pla/T19_2_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_2_gblup_variances_all=rep(list(list()),cycles) +T19_2_gblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_variances_all=rep(list(list()),cycles) +T19_2_gfblup_prediction_all=rep(list(list()),cycles) +T19_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_2...") + y=pheno_df_pla$T19_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_2_gblup_variances_all[[r]]<-var + T19_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_2_gblup_variances_all[[r]]<-list() + T19_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_2_gfblup_variances_all[[r]]<-var + T19_2_gfblup_prediction_all[[r]]<-pred + T19_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_2_gblup_variances_all,"T19_2_gblup_variances_all_6001_7297.rds") +saveRDS(T19_2_gblup_prediction_all,"T19_2_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_2_gfblup_variances_all,"T19_2_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_2_gfblup_prediction_all,"T19_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_2_gfblup_validate_all,"T19_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_3_1001_2000.R b/code/using_GO/pla/T19_3_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..157a4b4e595cb3f08938ecb1afe5cab9f8aa707f --- /dev/null +++ b/code/using_GO/pla/T19_3_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_3_gblup_variances_all=rep(list(list()),cycles) +T19_3_gblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_variances_all=rep(list(list()),cycles) +T19_3_gfblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_3...") + y=pheno_df_pla$T19_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_3_gblup_variances_all[[r]]<-var + T19_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_3_gblup_variances_all[[r]]<-list() + T19_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_3_gfblup_variances_all[[r]]<-var + T19_3_gfblup_prediction_all[[r]]<-pred + T19_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_3_gblup_variances_all,"T19_3_gblup_variances_all_1001_2000.rds") +saveRDS(T19_3_gblup_prediction_all,"T19_3_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_3_gfblup_variances_all,"T19_3_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_3_gfblup_prediction_all,"T19_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_3_gfblup_validate_all,"T19_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_3_1_1000.R b/code/using_GO/pla/T19_3_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..692b82387919766ac5526c330f5925020ea8b02d --- /dev/null +++ b/code/using_GO/pla/T19_3_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_3_gblup_variances_all=rep(list(list()),cycles) +T19_3_gblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_variances_all=rep(list(list()),cycles) +T19_3_gfblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_3...") + y=pheno_df_pla$T19_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_3_gblup_variances_all[[r]]<-var + T19_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_3_gblup_variances_all[[r]]<-list() + T19_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_3_gfblup_variances_all[[r]]<-var + T19_3_gfblup_prediction_all[[r]]<-pred + T19_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_3_gblup_variances_all,"T19_3_gblup_variances_all_1_1000.rds") +saveRDS(T19_3_gblup_prediction_all,"T19_3_gblup_prediction_all_1_1000.rds") +saveRDS(T19_3_gfblup_variances_all,"T19_3_gfblup_variances_all_1_1000.rds") +saveRDS(T19_3_gfblup_prediction_all,"T19_3_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_3_gfblup_validate_all,"T19_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_3_2001_3000.R b/code/using_GO/pla/T19_3_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..8962a1d60df3a467c5dbcd3d1340227b6404dc14 --- /dev/null +++ b/code/using_GO/pla/T19_3_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_3_gblup_variances_all=rep(list(list()),cycles) +T19_3_gblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_variances_all=rep(list(list()),cycles) +T19_3_gfblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_3...") + y=pheno_df_pla$T19_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_3_gblup_variances_all[[r]]<-var + T19_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_3_gblup_variances_all[[r]]<-list() + T19_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_3_gfblup_variances_all[[r]]<-var + T19_3_gfblup_prediction_all[[r]]<-pred + T19_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_3_gblup_variances_all,"T19_3_gblup_variances_all_2001_3000.rds") +saveRDS(T19_3_gblup_prediction_all,"T19_3_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_3_gfblup_variances_all,"T19_3_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_3_gfblup_prediction_all,"T19_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_3_gfblup_validate_all,"T19_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_3_3001_4000.R b/code/using_GO/pla/T19_3_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..e09750c406019d179a97a7bc6e4f054f4a93d497 --- /dev/null +++ b/code/using_GO/pla/T19_3_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_3_gblup_variances_all=rep(list(list()),cycles) +T19_3_gblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_variances_all=rep(list(list()),cycles) +T19_3_gfblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_3...") + y=pheno_df_pla$T19_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_3_gblup_variances_all[[r]]<-var + T19_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_3_gblup_variances_all[[r]]<-list() + T19_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_3_gfblup_variances_all[[r]]<-var + T19_3_gfblup_prediction_all[[r]]<-pred + T19_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_3_gblup_variances_all,"T19_3_gblup_variances_all_3001_4000.rds") +saveRDS(T19_3_gblup_prediction_all,"T19_3_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_3_gfblup_variances_all,"T19_3_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_3_gfblup_prediction_all,"T19_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_3_gfblup_validate_all,"T19_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_3_4001_5000.R b/code/using_GO/pla/T19_3_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..8daf7777c507c04b5a8fb5a7455fd0154243e165 --- /dev/null +++ b/code/using_GO/pla/T19_3_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_3_gblup_variances_all=rep(list(list()),cycles) +T19_3_gblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_variances_all=rep(list(list()),cycles) +T19_3_gfblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_3...") + y=pheno_df_pla$T19_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_3_gblup_variances_all[[r]]<-var + T19_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_3_gblup_variances_all[[r]]<-list() + T19_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_3_gfblup_variances_all[[r]]<-var + T19_3_gfblup_prediction_all[[r]]<-pred + T19_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_3_gblup_variances_all,"T19_3_gblup_variances_all_4001_5000.rds") +saveRDS(T19_3_gblup_prediction_all,"T19_3_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_3_gfblup_variances_all,"T19_3_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_3_gfblup_prediction_all,"T19_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_3_gfblup_validate_all,"T19_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_3_5001_6000.R b/code/using_GO/pla/T19_3_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..4305ce8022d3fe8359d9a7a68eab2197ed858b20 --- /dev/null +++ b/code/using_GO/pla/T19_3_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_3_gblup_variances_all=rep(list(list()),cycles) +T19_3_gblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_variances_all=rep(list(list()),cycles) +T19_3_gfblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_3...") + y=pheno_df_pla$T19_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_3_gblup_variances_all[[r]]<-var + T19_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_3_gblup_variances_all[[r]]<-list() + T19_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_3_gfblup_variances_all[[r]]<-var + T19_3_gfblup_prediction_all[[r]]<-pred + T19_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_3_gblup_variances_all,"T19_3_gblup_variances_all_5001_6000.rds") +saveRDS(T19_3_gblup_prediction_all,"T19_3_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_3_gfblup_variances_all,"T19_3_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_3_gfblup_prediction_all,"T19_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_3_gfblup_validate_all,"T19_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_3_6001_7297.R b/code/using_GO/pla/T19_3_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..ade8f192bec890c67a136193234ca192cc20a318 --- /dev/null +++ b/code/using_GO/pla/T19_3_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_3_gblup_variances_all=rep(list(list()),cycles) +T19_3_gblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_variances_all=rep(list(list()),cycles) +T19_3_gfblup_prediction_all=rep(list(list()),cycles) +T19_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_3...") + y=pheno_df_pla$T19_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_3_gblup_variances_all[[r]]<-var + T19_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_3_gblup_variances_all[[r]]<-list() + T19_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_3_gfblup_variances_all[[r]]<-var + T19_3_gfblup_prediction_all[[r]]<-pred + T19_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_3_gblup_variances_all,"T19_3_gblup_variances_all_6001_7297.rds") +saveRDS(T19_3_gblup_prediction_all,"T19_3_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_3_gfblup_variances_all,"T19_3_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_3_gfblup_prediction_all,"T19_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_3_gfblup_validate_all,"T19_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_4_1001_2000.R b/code/using_GO/pla/T19_4_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..afebbb4585d0de8144367ab0280502df523233c1 --- /dev/null +++ b/code/using_GO/pla/T19_4_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_4_gblup_variances_all=rep(list(list()),cycles) +T19_4_gblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_variances_all=rep(list(list()),cycles) +T19_4_gfblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_4...") + y=pheno_df_pla$T19_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_4_gblup_variances_all[[r]]<-var + T19_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_4_gblup_variances_all[[r]]<-list() + T19_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_4_gfblup_variances_all[[r]]<-var + T19_4_gfblup_prediction_all[[r]]<-pred + T19_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_4_gblup_variances_all,"T19_4_gblup_variances_all_1001_2000.rds") +saveRDS(T19_4_gblup_prediction_all,"T19_4_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_4_gfblup_variances_all,"T19_4_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_4_gfblup_prediction_all,"T19_4_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_4_gfblup_validate_all,"T19_4_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_4_1_1000.R b/code/using_GO/pla/T19_4_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..2e215a577fd34c14209bd7c713918b0e6e90f5ee --- /dev/null +++ b/code/using_GO/pla/T19_4_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_4_gblup_variances_all=rep(list(list()),cycles) +T19_4_gblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_variances_all=rep(list(list()),cycles) +T19_4_gfblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_4...") + y=pheno_df_pla$T19_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_4_gblup_variances_all[[r]]<-var + T19_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_4_gblup_variances_all[[r]]<-list() + T19_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_4_gfblup_variances_all[[r]]<-var + T19_4_gfblup_prediction_all[[r]]<-pred + T19_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_4_gblup_variances_all,"T19_4_gblup_variances_all_1_1000.rds") +saveRDS(T19_4_gblup_prediction_all,"T19_4_gblup_prediction_all_1_1000.rds") +saveRDS(T19_4_gfblup_variances_all,"T19_4_gfblup_variances_all_1_1000.rds") +saveRDS(T19_4_gfblup_prediction_all,"T19_4_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_4_gfblup_validate_all,"T19_4_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_4_2001_3000.R b/code/using_GO/pla/T19_4_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..729ace8d7ca07567d6c6295f7eaa60a191b37c7a --- /dev/null +++ b/code/using_GO/pla/T19_4_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_4_gblup_variances_all=rep(list(list()),cycles) +T19_4_gblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_variances_all=rep(list(list()),cycles) +T19_4_gfblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_4...") + y=pheno_df_pla$T19_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_4_gblup_variances_all[[r]]<-var + T19_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_4_gblup_variances_all[[r]]<-list() + T19_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_4_gfblup_variances_all[[r]]<-var + T19_4_gfblup_prediction_all[[r]]<-pred + T19_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_4_gblup_variances_all,"T19_4_gblup_variances_all_2001_3000.rds") +saveRDS(T19_4_gblup_prediction_all,"T19_4_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_4_gfblup_variances_all,"T19_4_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_4_gfblup_prediction_all,"T19_4_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_4_gfblup_validate_all,"T19_4_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_4_3001_4000.R b/code/using_GO/pla/T19_4_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..6bf9e07e018aef058532b898006016d61388f473 --- /dev/null +++ b/code/using_GO/pla/T19_4_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_4_gblup_variances_all=rep(list(list()),cycles) +T19_4_gblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_variances_all=rep(list(list()),cycles) +T19_4_gfblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_4...") + y=pheno_df_pla$T19_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_4_gblup_variances_all[[r]]<-var + T19_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_4_gblup_variances_all[[r]]<-list() + T19_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_4_gfblup_variances_all[[r]]<-var + T19_4_gfblup_prediction_all[[r]]<-pred + T19_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_4_gblup_variances_all,"T19_4_gblup_variances_all_3001_4000.rds") +saveRDS(T19_4_gblup_prediction_all,"T19_4_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_4_gfblup_variances_all,"T19_4_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_4_gfblup_prediction_all,"T19_4_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_4_gfblup_validate_all,"T19_4_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_4_4001_5000.R b/code/using_GO/pla/T19_4_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..5686a5e76cf01e857ebdb3598eaf20dd1b727fa0 --- /dev/null +++ b/code/using_GO/pla/T19_4_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_4_gblup_variances_all=rep(list(list()),cycles) +T19_4_gblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_variances_all=rep(list(list()),cycles) +T19_4_gfblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_4...") + y=pheno_df_pla$T19_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_4_gblup_variances_all[[r]]<-var + T19_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_4_gblup_variances_all[[r]]<-list() + T19_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_4_gfblup_variances_all[[r]]<-var + T19_4_gfblup_prediction_all[[r]]<-pred + T19_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_4_gblup_variances_all,"T19_4_gblup_variances_all_4001_5000.rds") +saveRDS(T19_4_gblup_prediction_all,"T19_4_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_4_gfblup_variances_all,"T19_4_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_4_gfblup_prediction_all,"T19_4_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_4_gfblup_validate_all,"T19_4_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_4_5001_6000.R b/code/using_GO/pla/T19_4_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..5917ed4ee6dcbd64b2fa7d99366e8ce768e49d66 --- /dev/null +++ b/code/using_GO/pla/T19_4_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_4_gblup_variances_all=rep(list(list()),cycles) +T19_4_gblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_variances_all=rep(list(list()),cycles) +T19_4_gfblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_4...") + y=pheno_df_pla$T19_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_4_gblup_variances_all[[r]]<-var + T19_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_4_gblup_variances_all[[r]]<-list() + T19_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_4_gfblup_variances_all[[r]]<-var + T19_4_gfblup_prediction_all[[r]]<-pred + T19_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_4_gblup_variances_all,"T19_4_gblup_variances_all_5001_6000.rds") +saveRDS(T19_4_gblup_prediction_all,"T19_4_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_4_gfblup_variances_all,"T19_4_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_4_gfblup_prediction_all,"T19_4_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_4_gfblup_validate_all,"T19_4_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_4_6001_7297.R b/code/using_GO/pla/T19_4_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..815713095d8ed4997a4c3b337da1aa16e769bd49 --- /dev/null +++ b/code/using_GO/pla/T19_4_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_4_gblup_variances_all=rep(list(list()),cycles) +T19_4_gblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_variances_all=rep(list(list()),cycles) +T19_4_gfblup_prediction_all=rep(list(list()),cycles) +T19_4_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_4...") + y=pheno_df_pla$T19_4 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_4_gblup_variances_all[[r]]<-var + T19_4_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_4_gblup_variances_all[[r]]<-list() + T19_4_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_4_gfblup_variances_all[[r]]<-var + T19_4_gfblup_prediction_all[[r]]<-pred + T19_4_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_4_gblup_variances_all,"T19_4_gblup_variances_all_6001_7297.rds") +saveRDS(T19_4_gblup_prediction_all,"T19_4_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_4_gfblup_variances_all,"T19_4_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_4_gfblup_prediction_all,"T19_4_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_4_gfblup_validate_all,"T19_4_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_5_1001_2000.R b/code/using_GO/pla/T19_5_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..d45a19959af561dc3872c9d624749328c5151ca7 --- /dev/null +++ b/code/using_GO/pla/T19_5_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_5_gblup_variances_all=rep(list(list()),cycles) +T19_5_gblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_variances_all=rep(list(list()),cycles) +T19_5_gfblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_5...") + y=pheno_df_pla$T19_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_5_gblup_variances_all[[r]]<-var + T19_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_5_gblup_variances_all[[r]]<-list() + T19_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_5_gfblup_variances_all[[r]]<-var + T19_5_gfblup_prediction_all[[r]]<-pred + T19_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_5_gblup_variances_all,"T19_5_gblup_variances_all_1001_2000.rds") +saveRDS(T19_5_gblup_prediction_all,"T19_5_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_5_gfblup_variances_all,"T19_5_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_5_gfblup_prediction_all,"T19_5_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_5_gfblup_validate_all,"T19_5_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_5_1_1000.R b/code/using_GO/pla/T19_5_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..f5ff7ca4174bf7eadf94c8fe06e394d1f717516f --- /dev/null +++ b/code/using_GO/pla/T19_5_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_5_gblup_variances_all=rep(list(list()),cycles) +T19_5_gblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_variances_all=rep(list(list()),cycles) +T19_5_gfblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_5...") + y=pheno_df_pla$T19_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_5_gblup_variances_all[[r]]<-var + T19_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_5_gblup_variances_all[[r]]<-list() + T19_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_5_gfblup_variances_all[[r]]<-var + T19_5_gfblup_prediction_all[[r]]<-pred + T19_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_5_gblup_variances_all,"T19_5_gblup_variances_all_1_1000.rds") +saveRDS(T19_5_gblup_prediction_all,"T19_5_gblup_prediction_all_1_1000.rds") +saveRDS(T19_5_gfblup_variances_all,"T19_5_gfblup_variances_all_1_1000.rds") +saveRDS(T19_5_gfblup_prediction_all,"T19_5_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_5_gfblup_validate_all,"T19_5_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_5_2001_3000.R b/code/using_GO/pla/T19_5_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..357aafd29d11c4663a4ae0ec844adb0f8d124b19 --- /dev/null +++ b/code/using_GO/pla/T19_5_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_5_gblup_variances_all=rep(list(list()),cycles) +T19_5_gblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_variances_all=rep(list(list()),cycles) +T19_5_gfblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_5...") + y=pheno_df_pla$T19_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_5_gblup_variances_all[[r]]<-var + T19_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_5_gblup_variances_all[[r]]<-list() + T19_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_5_gfblup_variances_all[[r]]<-var + T19_5_gfblup_prediction_all[[r]]<-pred + T19_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_5_gblup_variances_all,"T19_5_gblup_variances_all_2001_3000.rds") +saveRDS(T19_5_gblup_prediction_all,"T19_5_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_5_gfblup_variances_all,"T19_5_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_5_gfblup_prediction_all,"T19_5_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_5_gfblup_validate_all,"T19_5_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_5_3001_4000.R b/code/using_GO/pla/T19_5_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..8d1ab5c5547e07d0b0dc1e231312fc62b33afc01 --- /dev/null +++ b/code/using_GO/pla/T19_5_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_5_gblup_variances_all=rep(list(list()),cycles) +T19_5_gblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_variances_all=rep(list(list()),cycles) +T19_5_gfblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_5...") + y=pheno_df_pla$T19_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_5_gblup_variances_all[[r]]<-var + T19_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_5_gblup_variances_all[[r]]<-list() + T19_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_5_gfblup_variances_all[[r]]<-var + T19_5_gfblup_prediction_all[[r]]<-pred + T19_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_5_gblup_variances_all,"T19_5_gblup_variances_all_3001_4000.rds") +saveRDS(T19_5_gblup_prediction_all,"T19_5_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_5_gfblup_variances_all,"T19_5_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_5_gfblup_prediction_all,"T19_5_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_5_gfblup_validate_all,"T19_5_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_5_4001_5000.R b/code/using_GO/pla/T19_5_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..af4cb87b7baa904f54a8065411af1864afffae65 --- /dev/null +++ b/code/using_GO/pla/T19_5_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_5_gblup_variances_all=rep(list(list()),cycles) +T19_5_gblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_variances_all=rep(list(list()),cycles) +T19_5_gfblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_5...") + y=pheno_df_pla$T19_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_5_gblup_variances_all[[r]]<-var + T19_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_5_gblup_variances_all[[r]]<-list() + T19_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_5_gfblup_variances_all[[r]]<-var + T19_5_gfblup_prediction_all[[r]]<-pred + T19_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_5_gblup_variances_all,"T19_5_gblup_variances_all_4001_5000.rds") +saveRDS(T19_5_gblup_prediction_all,"T19_5_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_5_gfblup_variances_all,"T19_5_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_5_gfblup_prediction_all,"T19_5_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_5_gfblup_validate_all,"T19_5_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_5_5001_6000.R b/code/using_GO/pla/T19_5_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..3c1a372e0fd25b5f3115040d4dbacc2af2def03f --- /dev/null +++ b/code/using_GO/pla/T19_5_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_5_gblup_variances_all=rep(list(list()),cycles) +T19_5_gblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_variances_all=rep(list(list()),cycles) +T19_5_gfblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_5...") + y=pheno_df_pla$T19_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_5_gblup_variances_all[[r]]<-var + T19_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_5_gblup_variances_all[[r]]<-list() + T19_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_5_gfblup_variances_all[[r]]<-var + T19_5_gfblup_prediction_all[[r]]<-pred + T19_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_5_gblup_variances_all,"T19_5_gblup_variances_all_5001_6000.rds") +saveRDS(T19_5_gblup_prediction_all,"T19_5_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_5_gfblup_variances_all,"T19_5_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_5_gfblup_prediction_all,"T19_5_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_5_gfblup_validate_all,"T19_5_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_5_6001_7297.R b/code/using_GO/pla/T19_5_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..f6970dbf1cbc8d79ec997d3c248366387c171167 --- /dev/null +++ b/code/using_GO/pla/T19_5_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_5_gblup_variances_all=rep(list(list()),cycles) +T19_5_gblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_variances_all=rep(list(list()),cycles) +T19_5_gfblup_prediction_all=rep(list(list()),cycles) +T19_5_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_5...") + y=pheno_df_pla$T19_5 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_5_gblup_variances_all[[r]]<-var + T19_5_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_5_gblup_variances_all[[r]]<-list() + T19_5_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_5_gfblup_variances_all[[r]]<-var + T19_5_gfblup_prediction_all[[r]]<-pred + T19_5_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_5_gblup_variances_all,"T19_5_gblup_variances_all_6001_7297.rds") +saveRDS(T19_5_gblup_prediction_all,"T19_5_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_5_gfblup_variances_all,"T19_5_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_5_gfblup_prediction_all,"T19_5_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_5_gfblup_validate_all,"T19_5_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_6_1001_2000.R b/code/using_GO/pla/T19_6_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..0b6396ceb5b1c8318e1fa9c31c30acdb467863fb --- /dev/null +++ b/code/using_GO/pla/T19_6_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_6_gblup_variances_all=rep(list(list()),cycles) +T19_6_gblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_variances_all=rep(list(list()),cycles) +T19_6_gfblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_6...") + y=pheno_df_pla$T19_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_6_gblup_variances_all[[r]]<-var + T19_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_6_gblup_variances_all[[r]]<-list() + T19_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_6_gfblup_variances_all[[r]]<-var + T19_6_gfblup_prediction_all[[r]]<-pred + T19_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_6_gblup_variances_all,"T19_6_gblup_variances_all_1001_2000.rds") +saveRDS(T19_6_gblup_prediction_all,"T19_6_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_6_gfblup_variances_all,"T19_6_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_6_gfblup_prediction_all,"T19_6_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_6_gfblup_validate_all,"T19_6_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_6_1_1000.R b/code/using_GO/pla/T19_6_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..4962934c14873d393da1487e42f55ecf6556f3e7 --- /dev/null +++ b/code/using_GO/pla/T19_6_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_6_gblup_variances_all=rep(list(list()),cycles) +T19_6_gblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_variances_all=rep(list(list()),cycles) +T19_6_gfblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_6...") + y=pheno_df_pla$T19_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_6_gblup_variances_all[[r]]<-var + T19_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_6_gblup_variances_all[[r]]<-list() + T19_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_6_gfblup_variances_all[[r]]<-var + T19_6_gfblup_prediction_all[[r]]<-pred + T19_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_6_gblup_variances_all,"T19_6_gblup_variances_all_1_1000.rds") +saveRDS(T19_6_gblup_prediction_all,"T19_6_gblup_prediction_all_1_1000.rds") +saveRDS(T19_6_gfblup_variances_all,"T19_6_gfblup_variances_all_1_1000.rds") +saveRDS(T19_6_gfblup_prediction_all,"T19_6_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_6_gfblup_validate_all,"T19_6_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_6_2001_3000.R b/code/using_GO/pla/T19_6_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..e89602d8da75f5732d716770d73c7246c3ad7245 --- /dev/null +++ b/code/using_GO/pla/T19_6_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_6_gblup_variances_all=rep(list(list()),cycles) +T19_6_gblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_variances_all=rep(list(list()),cycles) +T19_6_gfblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_6...") + y=pheno_df_pla$T19_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_6_gblup_variances_all[[r]]<-var + T19_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_6_gblup_variances_all[[r]]<-list() + T19_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_6_gfblup_variances_all[[r]]<-var + T19_6_gfblup_prediction_all[[r]]<-pred + T19_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_6_gblup_variances_all,"T19_6_gblup_variances_all_2001_3000.rds") +saveRDS(T19_6_gblup_prediction_all,"T19_6_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_6_gfblup_variances_all,"T19_6_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_6_gfblup_prediction_all,"T19_6_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_6_gfblup_validate_all,"T19_6_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_6_3001_4000.R b/code/using_GO/pla/T19_6_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..6abad1e41c4ec7a334f2b9091c9560620c97013b --- /dev/null +++ b/code/using_GO/pla/T19_6_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_6_gblup_variances_all=rep(list(list()),cycles) +T19_6_gblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_variances_all=rep(list(list()),cycles) +T19_6_gfblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_6...") + y=pheno_df_pla$T19_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_6_gblup_variances_all[[r]]<-var + T19_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_6_gblup_variances_all[[r]]<-list() + T19_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_6_gfblup_variances_all[[r]]<-var + T19_6_gfblup_prediction_all[[r]]<-pred + T19_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_6_gblup_variances_all,"T19_6_gblup_variances_all_3001_4000.rds") +saveRDS(T19_6_gblup_prediction_all,"T19_6_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_6_gfblup_variances_all,"T19_6_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_6_gfblup_prediction_all,"T19_6_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_6_gfblup_validate_all,"T19_6_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_6_4001_5000.R b/code/using_GO/pla/T19_6_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..de6adc40d9a62e0d5bd7e55806401f20947c24f9 --- /dev/null +++ b/code/using_GO/pla/T19_6_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_6_gblup_variances_all=rep(list(list()),cycles) +T19_6_gblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_variances_all=rep(list(list()),cycles) +T19_6_gfblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_6...") + y=pheno_df_pla$T19_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_6_gblup_variances_all[[r]]<-var + T19_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_6_gblup_variances_all[[r]]<-list() + T19_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_6_gfblup_variances_all[[r]]<-var + T19_6_gfblup_prediction_all[[r]]<-pred + T19_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_6_gblup_variances_all,"T19_6_gblup_variances_all_4001_5000.rds") +saveRDS(T19_6_gblup_prediction_all,"T19_6_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_6_gfblup_variances_all,"T19_6_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_6_gfblup_prediction_all,"T19_6_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_6_gfblup_validate_all,"T19_6_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_6_5001_6000.R b/code/using_GO/pla/T19_6_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..9af25ae712cfb5423596b89dec19fba8a67d7683 --- /dev/null +++ b/code/using_GO/pla/T19_6_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_6_gblup_variances_all=rep(list(list()),cycles) +T19_6_gblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_variances_all=rep(list(list()),cycles) +T19_6_gfblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_6...") + y=pheno_df_pla$T19_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_6_gblup_variances_all[[r]]<-var + T19_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_6_gblup_variances_all[[r]]<-list() + T19_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_6_gfblup_variances_all[[r]]<-var + T19_6_gfblup_prediction_all[[r]]<-pred + T19_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_6_gblup_variances_all,"T19_6_gblup_variances_all_5001_6000.rds") +saveRDS(T19_6_gblup_prediction_all,"T19_6_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_6_gfblup_variances_all,"T19_6_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_6_gfblup_prediction_all,"T19_6_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_6_gfblup_validate_all,"T19_6_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_6_6001_7297.R b/code/using_GO/pla/T19_6_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..06492fbb080c9a8e6c0787fc78098a97b648cc34 --- /dev/null +++ b/code/using_GO/pla/T19_6_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_6_gblup_variances_all=rep(list(list()),cycles) +T19_6_gblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_variances_all=rep(list(list()),cycles) +T19_6_gfblup_prediction_all=rep(list(list()),cycles) +T19_6_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_6...") + y=pheno_df_pla$T19_6 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_6_gblup_variances_all[[r]]<-var + T19_6_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_6_gblup_variances_all[[r]]<-list() + T19_6_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_6_gfblup_variances_all[[r]]<-var + T19_6_gfblup_prediction_all[[r]]<-pred + T19_6_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_6_gblup_variances_all,"T19_6_gblup_variances_all_6001_7297.rds") +saveRDS(T19_6_gblup_prediction_all,"T19_6_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_6_gfblup_variances_all,"T19_6_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_6_gfblup_prediction_all,"T19_6_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_6_gfblup_validate_all,"T19_6_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_7_1001_2000.R b/code/using_GO/pla/T19_7_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..c8d6c1079382cf341edf2a2cf26284f1ad1d2b05 --- /dev/null +++ b/code/using_GO/pla/T19_7_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_7_gblup_variances_all=rep(list(list()),cycles) +T19_7_gblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_variances_all=rep(list(list()),cycles) +T19_7_gfblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_7...") + y=pheno_df_pla$T19_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_7_gblup_variances_all[[r]]<-var + T19_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_7_gblup_variances_all[[r]]<-list() + T19_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_7_gfblup_variances_all[[r]]<-var + T19_7_gfblup_prediction_all[[r]]<-pred + T19_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_7_gblup_variances_all,"T19_7_gblup_variances_all_1001_2000.rds") +saveRDS(T19_7_gblup_prediction_all,"T19_7_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_7_gfblup_variances_all,"T19_7_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_7_gfblup_prediction_all,"T19_7_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_7_gfblup_validate_all,"T19_7_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_7_1_1000.R b/code/using_GO/pla/T19_7_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..450a7eff1b5451aa50fce66128896a1797a4d483 --- /dev/null +++ b/code/using_GO/pla/T19_7_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_7_gblup_variances_all=rep(list(list()),cycles) +T19_7_gblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_variances_all=rep(list(list()),cycles) +T19_7_gfblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_7...") + y=pheno_df_pla$T19_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_7_gblup_variances_all[[r]]<-var + T19_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_7_gblup_variances_all[[r]]<-list() + T19_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_7_gfblup_variances_all[[r]]<-var + T19_7_gfblup_prediction_all[[r]]<-pred + T19_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_7_gblup_variances_all,"T19_7_gblup_variances_all_1_1000.rds") +saveRDS(T19_7_gblup_prediction_all,"T19_7_gblup_prediction_all_1_1000.rds") +saveRDS(T19_7_gfblup_variances_all,"T19_7_gfblup_variances_all_1_1000.rds") +saveRDS(T19_7_gfblup_prediction_all,"T19_7_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_7_gfblup_validate_all,"T19_7_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_7_2001_3000.R b/code/using_GO/pla/T19_7_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..7cb6b035793a1573cdd0dfcf6ad1ecbfb77cb785 --- /dev/null +++ b/code/using_GO/pla/T19_7_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_7_gblup_variances_all=rep(list(list()),cycles) +T19_7_gblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_variances_all=rep(list(list()),cycles) +T19_7_gfblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_7...") + y=pheno_df_pla$T19_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_7_gblup_variances_all[[r]]<-var + T19_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_7_gblup_variances_all[[r]]<-list() + T19_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_7_gfblup_variances_all[[r]]<-var + T19_7_gfblup_prediction_all[[r]]<-pred + T19_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_7_gblup_variances_all,"T19_7_gblup_variances_all_2001_3000.rds") +saveRDS(T19_7_gblup_prediction_all,"T19_7_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_7_gfblup_variances_all,"T19_7_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_7_gfblup_prediction_all,"T19_7_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_7_gfblup_validate_all,"T19_7_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_7_3001_4000.R b/code/using_GO/pla/T19_7_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..bffea958b6da431f0812d3b3b676d7cb5e9803b1 --- /dev/null +++ b/code/using_GO/pla/T19_7_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_7_gblup_variances_all=rep(list(list()),cycles) +T19_7_gblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_variances_all=rep(list(list()),cycles) +T19_7_gfblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_7...") + y=pheno_df_pla$T19_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_7_gblup_variances_all[[r]]<-var + T19_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_7_gblup_variances_all[[r]]<-list() + T19_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_7_gfblup_variances_all[[r]]<-var + T19_7_gfblup_prediction_all[[r]]<-pred + T19_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_7_gblup_variances_all,"T19_7_gblup_variances_all_3001_4000.rds") +saveRDS(T19_7_gblup_prediction_all,"T19_7_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_7_gfblup_variances_all,"T19_7_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_7_gfblup_prediction_all,"T19_7_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_7_gfblup_validate_all,"T19_7_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_7_4001_5000.R b/code/using_GO/pla/T19_7_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..f3ff4840a079ca1345b40f135826f5585643a7e3 --- /dev/null +++ b/code/using_GO/pla/T19_7_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_7_gblup_variances_all=rep(list(list()),cycles) +T19_7_gblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_variances_all=rep(list(list()),cycles) +T19_7_gfblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_7...") + y=pheno_df_pla$T19_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_7_gblup_variances_all[[r]]<-var + T19_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_7_gblup_variances_all[[r]]<-list() + T19_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_7_gfblup_variances_all[[r]]<-var + T19_7_gfblup_prediction_all[[r]]<-pred + T19_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_7_gblup_variances_all,"T19_7_gblup_variances_all_4001_5000.rds") +saveRDS(T19_7_gblup_prediction_all,"T19_7_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_7_gfblup_variances_all,"T19_7_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_7_gfblup_prediction_all,"T19_7_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_7_gfblup_validate_all,"T19_7_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_7_5001_6000.R b/code/using_GO/pla/T19_7_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..0d79810fe228a1158b7d596d42bacfc624c31b40 --- /dev/null +++ b/code/using_GO/pla/T19_7_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_7_gblup_variances_all=rep(list(list()),cycles) +T19_7_gblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_variances_all=rep(list(list()),cycles) +T19_7_gfblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_7...") + y=pheno_df_pla$T19_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_7_gblup_variances_all[[r]]<-var + T19_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_7_gblup_variances_all[[r]]<-list() + T19_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_7_gfblup_variances_all[[r]]<-var + T19_7_gfblup_prediction_all[[r]]<-pred + T19_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_7_gblup_variances_all,"T19_7_gblup_variances_all_5001_6000.rds") +saveRDS(T19_7_gblup_prediction_all,"T19_7_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_7_gfblup_variances_all,"T19_7_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_7_gfblup_prediction_all,"T19_7_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_7_gfblup_validate_all,"T19_7_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_7_6001_7297.R b/code/using_GO/pla/T19_7_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..9d812e29e7c2d6990a64419280e9fddcec377dfb --- /dev/null +++ b/code/using_GO/pla/T19_7_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_7_gblup_variances_all=rep(list(list()),cycles) +T19_7_gblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_variances_all=rep(list(list()),cycles) +T19_7_gfblup_prediction_all=rep(list(list()),cycles) +T19_7_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_7...") + y=pheno_df_pla$T19_7 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_7_gblup_variances_all[[r]]<-var + T19_7_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_7_gblup_variances_all[[r]]<-list() + T19_7_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_7_gfblup_variances_all[[r]]<-var + T19_7_gfblup_prediction_all[[r]]<-pred + T19_7_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_7_gblup_variances_all,"T19_7_gblup_variances_all_6001_7297.rds") +saveRDS(T19_7_gblup_prediction_all,"T19_7_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_7_gfblup_variances_all,"T19_7_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_7_gfblup_prediction_all,"T19_7_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_7_gfblup_validate_all,"T19_7_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_8_1001_2000.R b/code/using_GO/pla/T19_8_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..ea6b7916f5e09595fb6679f31cb858018c2e7063 --- /dev/null +++ b/code/using_GO/pla/T19_8_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_8_gblup_variances_all=rep(list(list()),cycles) +T19_8_gblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_variances_all=rep(list(list()),cycles) +T19_8_gfblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_8...") + y=pheno_df_pla$T19_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_8_gblup_variances_all[[r]]<-var + T19_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_8_gblup_variances_all[[r]]<-list() + T19_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_8_gfblup_variances_all[[r]]<-var + T19_8_gfblup_prediction_all[[r]]<-pred + T19_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_8_gblup_variances_all,"T19_8_gblup_variances_all_1001_2000.rds") +saveRDS(T19_8_gblup_prediction_all,"T19_8_gblup_prediction_all_1001_2000.rds") +saveRDS(T19_8_gfblup_variances_all,"T19_8_gfblup_variances_all_1001_2000.rds") +saveRDS(T19_8_gfblup_prediction_all,"T19_8_gfblup_prediction_all_1001_2000.rds") +saveRDS(T19_8_gfblup_validate_all,"T19_8_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_8_1_1000.R b/code/using_GO/pla/T19_8_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..e545504da5e34922c2ad97ad3a0bc15099ef735a --- /dev/null +++ b/code/using_GO/pla/T19_8_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_8_gblup_variances_all=rep(list(list()),cycles) +T19_8_gblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_variances_all=rep(list(list()),cycles) +T19_8_gfblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_8...") + y=pheno_df_pla$T19_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_8_gblup_variances_all[[r]]<-var + T19_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_8_gblup_variances_all[[r]]<-list() + T19_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_8_gfblup_variances_all[[r]]<-var + T19_8_gfblup_prediction_all[[r]]<-pred + T19_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_8_gblup_variances_all,"T19_8_gblup_variances_all_1_1000.rds") +saveRDS(T19_8_gblup_prediction_all,"T19_8_gblup_prediction_all_1_1000.rds") +saveRDS(T19_8_gfblup_variances_all,"T19_8_gfblup_variances_all_1_1000.rds") +saveRDS(T19_8_gfblup_prediction_all,"T19_8_gfblup_prediction_all_1_1000.rds") +saveRDS(T19_8_gfblup_validate_all,"T19_8_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_8_2001_3000.R b/code/using_GO/pla/T19_8_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..d87af76aa1c74fc86120f9156e98a219f14aa78c --- /dev/null +++ b/code/using_GO/pla/T19_8_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_8_gblup_variances_all=rep(list(list()),cycles) +T19_8_gblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_variances_all=rep(list(list()),cycles) +T19_8_gfblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_8...") + y=pheno_df_pla$T19_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_8_gblup_variances_all[[r]]<-var + T19_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_8_gblup_variances_all[[r]]<-list() + T19_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_8_gfblup_variances_all[[r]]<-var + T19_8_gfblup_prediction_all[[r]]<-pred + T19_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_8_gblup_variances_all,"T19_8_gblup_variances_all_2001_3000.rds") +saveRDS(T19_8_gblup_prediction_all,"T19_8_gblup_prediction_all_2001_3000.rds") +saveRDS(T19_8_gfblup_variances_all,"T19_8_gfblup_variances_all_2001_3000.rds") +saveRDS(T19_8_gfblup_prediction_all,"T19_8_gfblup_prediction_all_2001_3000.rds") +saveRDS(T19_8_gfblup_validate_all,"T19_8_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_8_3001_4000.R b/code/using_GO/pla/T19_8_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..9822cbc9db2e11b5b7e4735717fcab53d247e800 --- /dev/null +++ b/code/using_GO/pla/T19_8_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_8_gblup_variances_all=rep(list(list()),cycles) +T19_8_gblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_variances_all=rep(list(list()),cycles) +T19_8_gfblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_8...") + y=pheno_df_pla$T19_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_8_gblup_variances_all[[r]]<-var + T19_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_8_gblup_variances_all[[r]]<-list() + T19_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_8_gfblup_variances_all[[r]]<-var + T19_8_gfblup_prediction_all[[r]]<-pred + T19_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_8_gblup_variances_all,"T19_8_gblup_variances_all_3001_4000.rds") +saveRDS(T19_8_gblup_prediction_all,"T19_8_gblup_prediction_all_3001_4000.rds") +saveRDS(T19_8_gfblup_variances_all,"T19_8_gfblup_variances_all_3001_4000.rds") +saveRDS(T19_8_gfblup_prediction_all,"T19_8_gfblup_prediction_all_3001_4000.rds") +saveRDS(T19_8_gfblup_validate_all,"T19_8_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_8_4001_5000.R b/code/using_GO/pla/T19_8_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..4a5b40aecbfe6606ea24503b978d3eb8f43c4222 --- /dev/null +++ b/code/using_GO/pla/T19_8_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_8_gblup_variances_all=rep(list(list()),cycles) +T19_8_gblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_variances_all=rep(list(list()),cycles) +T19_8_gfblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_8...") + y=pheno_df_pla$T19_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_8_gblup_variances_all[[r]]<-var + T19_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_8_gblup_variances_all[[r]]<-list() + T19_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_8_gfblup_variances_all[[r]]<-var + T19_8_gfblup_prediction_all[[r]]<-pred + T19_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_8_gblup_variances_all,"T19_8_gblup_variances_all_4001_5000.rds") +saveRDS(T19_8_gblup_prediction_all,"T19_8_gblup_prediction_all_4001_5000.rds") +saveRDS(T19_8_gfblup_variances_all,"T19_8_gfblup_variances_all_4001_5000.rds") +saveRDS(T19_8_gfblup_prediction_all,"T19_8_gfblup_prediction_all_4001_5000.rds") +saveRDS(T19_8_gfblup_validate_all,"T19_8_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_8_5001_6000.R b/code/using_GO/pla/T19_8_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..766d4f81b0fa5086fbf0ff4804dfff5a4b73c824 --- /dev/null +++ b/code/using_GO/pla/T19_8_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_8_gblup_variances_all=rep(list(list()),cycles) +T19_8_gblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_variances_all=rep(list(list()),cycles) +T19_8_gfblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_8...") + y=pheno_df_pla$T19_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_8_gblup_variances_all[[r]]<-var + T19_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_8_gblup_variances_all[[r]]<-list() + T19_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_8_gfblup_variances_all[[r]]<-var + T19_8_gfblup_prediction_all[[r]]<-pred + T19_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_8_gblup_variances_all,"T19_8_gblup_variances_all_5001_6000.rds") +saveRDS(T19_8_gblup_prediction_all,"T19_8_gblup_prediction_all_5001_6000.rds") +saveRDS(T19_8_gfblup_variances_all,"T19_8_gfblup_variances_all_5001_6000.rds") +saveRDS(T19_8_gfblup_prediction_all,"T19_8_gfblup_prediction_all_5001_6000.rds") +saveRDS(T19_8_gfblup_validate_all,"T19_8_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T19_8_6001_7297.R b/code/using_GO/pla/T19_8_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..c699d860a2e2079b74edb4dabb954008a808e396 --- /dev/null +++ b/code/using_GO/pla/T19_8_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T19_8_gblup_variances_all=rep(list(list()),cycles) +T19_8_gblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_variances_all=rep(list(list()),cycles) +T19_8_gfblup_prediction_all=rep(list(list()),cycles) +T19_8_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T19_8...") + y=pheno_df_pla$T19_8 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T19_8_gblup_variances_all[[r]]<-var + T19_8_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T19_8_gblup_variances_all[[r]]<-list() + T19_8_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T19_8_gfblup_variances_all[[r]]<-var + T19_8_gfblup_prediction_all[[r]]<-pred + T19_8_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T19_8_gblup_variances_all,"T19_8_gblup_variances_all_6001_7297.rds") +saveRDS(T19_8_gblup_prediction_all,"T19_8_gblup_prediction_all_6001_7297.rds") +saveRDS(T19_8_gfblup_variances_all,"T19_8_gfblup_variances_all_6001_7297.rds") +saveRDS(T19_8_gfblup_prediction_all,"T19_8_gfblup_prediction_all_6001_7297.rds") +saveRDS(T19_8_gfblup_validate_all,"T19_8_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_1_1001_2000.R b/code/using_GO/pla/T20_1_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..e4f24a8d44f33e51f74abcac6a3feed5f7c331c7 --- /dev/null +++ b/code/using_GO/pla/T20_1_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_1_gblup_variances_all=rep(list(list()),cycles) +T20_1_gblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_variances_all=rep(list(list()),cycles) +T20_1_gfblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_1...") + y=pheno_df_pla$T20_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_1_gblup_variances_all[[r]]<-var + T20_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_1_gblup_variances_all[[r]]<-list() + T20_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_1_gfblup_variances_all[[r]]<-var + T20_1_gfblup_prediction_all[[r]]<-pred + T20_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_1_gblup_variances_all,"T20_1_gblup_variances_all_1001_2000.rds") +saveRDS(T20_1_gblup_prediction_all,"T20_1_gblup_prediction_all_1001_2000.rds") +saveRDS(T20_1_gfblup_variances_all,"T20_1_gfblup_variances_all_1001_2000.rds") +saveRDS(T20_1_gfblup_prediction_all,"T20_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(T20_1_gfblup_validate_all,"T20_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_1_1_1000.R b/code/using_GO/pla/T20_1_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..7b0d3d309b085fc67b059d7f1a249271bd037898 --- /dev/null +++ b/code/using_GO/pla/T20_1_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_1_gblup_variances_all=rep(list(list()),cycles) +T20_1_gblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_variances_all=rep(list(list()),cycles) +T20_1_gfblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_1...") + y=pheno_df_pla$T20_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_1_gblup_variances_all[[r]]<-var + T20_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_1_gblup_variances_all[[r]]<-list() + T20_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_1_gfblup_variances_all[[r]]<-var + T20_1_gfblup_prediction_all[[r]]<-pred + T20_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_1_gblup_variances_all,"T20_1_gblup_variances_all_1_1000.rds") +saveRDS(T20_1_gblup_prediction_all,"T20_1_gblup_prediction_all_1_1000.rds") +saveRDS(T20_1_gfblup_variances_all,"T20_1_gfblup_variances_all_1_1000.rds") +saveRDS(T20_1_gfblup_prediction_all,"T20_1_gfblup_prediction_all_1_1000.rds") +saveRDS(T20_1_gfblup_validate_all,"T20_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_1_2001_3000.R b/code/using_GO/pla/T20_1_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..dac8bf875a3904137245ddd3fd7ac8f6b8c3c3b1 --- /dev/null +++ b/code/using_GO/pla/T20_1_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_1_gblup_variances_all=rep(list(list()),cycles) +T20_1_gblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_variances_all=rep(list(list()),cycles) +T20_1_gfblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_1...") + y=pheno_df_pla$T20_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_1_gblup_variances_all[[r]]<-var + T20_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_1_gblup_variances_all[[r]]<-list() + T20_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_1_gfblup_variances_all[[r]]<-var + T20_1_gfblup_prediction_all[[r]]<-pred + T20_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_1_gblup_variances_all,"T20_1_gblup_variances_all_2001_3000.rds") +saveRDS(T20_1_gblup_prediction_all,"T20_1_gblup_prediction_all_2001_3000.rds") +saveRDS(T20_1_gfblup_variances_all,"T20_1_gfblup_variances_all_2001_3000.rds") +saveRDS(T20_1_gfblup_prediction_all,"T20_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(T20_1_gfblup_validate_all,"T20_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_1_3001_4000.R b/code/using_GO/pla/T20_1_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..789b0408307a6a86357880cc5e29f5d47e49ab17 --- /dev/null +++ b/code/using_GO/pla/T20_1_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_1_gblup_variances_all=rep(list(list()),cycles) +T20_1_gblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_variances_all=rep(list(list()),cycles) +T20_1_gfblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_1...") + y=pheno_df_pla$T20_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_1_gblup_variances_all[[r]]<-var + T20_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_1_gblup_variances_all[[r]]<-list() + T20_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_1_gfblup_variances_all[[r]]<-var + T20_1_gfblup_prediction_all[[r]]<-pred + T20_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_1_gblup_variances_all,"T20_1_gblup_variances_all_3001_4000.rds") +saveRDS(T20_1_gblup_prediction_all,"T20_1_gblup_prediction_all_3001_4000.rds") +saveRDS(T20_1_gfblup_variances_all,"T20_1_gfblup_variances_all_3001_4000.rds") +saveRDS(T20_1_gfblup_prediction_all,"T20_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(T20_1_gfblup_validate_all,"T20_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_1_4001_5000.R b/code/using_GO/pla/T20_1_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..4a8b3569f6578ca7707be75bbec4f8dbe9d74b58 --- /dev/null +++ b/code/using_GO/pla/T20_1_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_1_gblup_variances_all=rep(list(list()),cycles) +T20_1_gblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_variances_all=rep(list(list()),cycles) +T20_1_gfblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_1...") + y=pheno_df_pla$T20_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_1_gblup_variances_all[[r]]<-var + T20_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_1_gblup_variances_all[[r]]<-list() + T20_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_1_gfblup_variances_all[[r]]<-var + T20_1_gfblup_prediction_all[[r]]<-pred + T20_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_1_gblup_variances_all,"T20_1_gblup_variances_all_4001_5000.rds") +saveRDS(T20_1_gblup_prediction_all,"T20_1_gblup_prediction_all_4001_5000.rds") +saveRDS(T20_1_gfblup_variances_all,"T20_1_gfblup_variances_all_4001_5000.rds") +saveRDS(T20_1_gfblup_prediction_all,"T20_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(T20_1_gfblup_validate_all,"T20_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_1_5001_6000.R b/code/using_GO/pla/T20_1_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..ed86247cba37a5d990a5814781272cb68248a369 --- /dev/null +++ b/code/using_GO/pla/T20_1_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_1_gblup_variances_all=rep(list(list()),cycles) +T20_1_gblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_variances_all=rep(list(list()),cycles) +T20_1_gfblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_1...") + y=pheno_df_pla$T20_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_1_gblup_variances_all[[r]]<-var + T20_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_1_gblup_variances_all[[r]]<-list() + T20_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_1_gfblup_variances_all[[r]]<-var + T20_1_gfblup_prediction_all[[r]]<-pred + T20_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_1_gblup_variances_all,"T20_1_gblup_variances_all_5001_6000.rds") +saveRDS(T20_1_gblup_prediction_all,"T20_1_gblup_prediction_all_5001_6000.rds") +saveRDS(T20_1_gfblup_variances_all,"T20_1_gfblup_variances_all_5001_6000.rds") +saveRDS(T20_1_gfblup_prediction_all,"T20_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(T20_1_gfblup_validate_all,"T20_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_1_6001_7297.R b/code/using_GO/pla/T20_1_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..ce3fb321b113a6385028404986490736c25e0007 --- /dev/null +++ b/code/using_GO/pla/T20_1_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_1_gblup_variances_all=rep(list(list()),cycles) +T20_1_gblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_variances_all=rep(list(list()),cycles) +T20_1_gfblup_prediction_all=rep(list(list()),cycles) +T20_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_1...") + y=pheno_df_pla$T20_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_1_gblup_variances_all[[r]]<-var + T20_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_1_gblup_variances_all[[r]]<-list() + T20_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_1_gfblup_variances_all[[r]]<-var + T20_1_gfblup_prediction_all[[r]]<-pred + T20_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_1_gblup_variances_all,"T20_1_gblup_variances_all_6001_7297.rds") +saveRDS(T20_1_gblup_prediction_all,"T20_1_gblup_prediction_all_6001_7297.rds") +saveRDS(T20_1_gfblup_variances_all,"T20_1_gfblup_variances_all_6001_7297.rds") +saveRDS(T20_1_gfblup_prediction_all,"T20_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(T20_1_gfblup_validate_all,"T20_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_2_1001_2000.R b/code/using_GO/pla/T20_2_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..7242215366070f1b2fdac7973b1907dd87ca066d --- /dev/null +++ b/code/using_GO/pla/T20_2_1001_2000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all_1001_2000.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all_1001_2000.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all_1001_2000.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_2_1_1000.R b/code/using_GO/pla/T20_2_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..ada4b08a2489a7ffd3c734b3fd643333580b5ee4 --- /dev/null +++ b/code/using_GO/pla/T20_2_1_1000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all_1_1000.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all_1_1000.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all_1_1000.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all_1_1000.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_2_2001_3000.R b/code/using_GO/pla/T20_2_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..5dbc48d887f2a520c231043bea2400c0c3d6fdea --- /dev/null +++ b/code/using_GO/pla/T20_2_2001_3000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all_2001_3000.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all_2001_3000.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all_2001_3000.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_2_3001_4000.R b/code/using_GO/pla/T20_2_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..0824607f931870331413cafff151ebb72ce1f7ad --- /dev/null +++ b/code/using_GO/pla/T20_2_3001_4000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all_3001_4000.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all_3001_4000.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all_3001_4000.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_2_4001_5000.R b/code/using_GO/pla/T20_2_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..fe263e253bb31f11edfe1e23cc8a755406ae67fd --- /dev/null +++ b/code/using_GO/pla/T20_2_4001_5000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all_4001_5000.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all_4001_5000.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all_4001_5000.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_2_5001_6000.R b/code/using_GO/pla/T20_2_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..d10b15090c0dacac64861e9b0197f3eebefac203 --- /dev/null +++ b/code/using_GO/pla/T20_2_5001_6000.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all_5001_6000.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all_5001_6000.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all_5001_6000.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/T20_2_6001_7297.R b/code/using_GO/pla/T20_2_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..0b0f4751d0f8e8ffedd3804ba601068480f5f264 --- /dev/null +++ b/code/using_GO/pla/T20_2_6001_7297.R @@ -0,0 +1,131 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +GF_filtered<-GF_filtered[6001:7297] +rGF_filtered<-rGF_filtered[6001:7297] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all_6001_7297.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all_6001_7297.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all_6001_7297.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/combine_cv_results.R b/code/using_GO/pla/combine_cv_results.R new file mode 100644 index 0000000000000000000000000000000000000000..3012bf6498305fe6c5cc60acce7afb349adbeb50 --- /dev/null +++ b/code/using_GO/pla/combine_cv_results.R @@ -0,0 +1,60 @@ +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold") +T14_1=readRDS("T14_1_all_go_cv_results.rds") +T14_2=readRDS("T14_2_all_go_cv_results.rds") +T14_3=readRDS("T14_3_all_go_cv_results.rds") +T14_4=readRDS("T14_4_all_go_cv_results.rds") +T14_5=readRDS("T14_5_all_go_cv_results.rds") +T14_6=readRDS("T14_6_all_go_cv_results.rds") +T14_7=readRDS("T14_7_all_go_cv_results.rds") +T14_8=readRDS("T14_8_all_go_cv_results.rds") +T15_1=readRDS("T15_1_all_go_cv_results.rds") +T15_2=readRDS("T15_2_all_go_cv_results.rds") +T15_3=readRDS("T15_3_all_go_cv_results.rds") +T15_4=readRDS("T15_4_all_go_cv_results.rds") +T15_5=readRDS("T15_5_all_go_cv_results.rds") +T15_6=readRDS("T15_6_all_go_cv_results.rds") +T15_7=readRDS("T15_7_all_go_cv_results.rds") +T15_8=readRDS("T15_8_all_go_cv_results.rds") +T16_1=readRDS("T16_1_all_go_cv_results.rds") +T16_2=readRDS("T16_2_all_go_cv_results.rds") +T16_3=readRDS("T16_3_all_go_cv_results.rds") +T16_4=readRDS("T16_4_all_go_cv_results.rds") +T16_5=readRDS("T16_5_all_go_cv_results.rds") +T16_6=readRDS("T16_6_all_go_cv_results.rds") +T16_7=readRDS("T16_7_all_go_cv_results.rds") +T16_8=readRDS("T16_8_all_go_cv_results.rds") +T17_1=readRDS("T17_1_all_go_cv_results.rds") +T17_2=readRDS("T17_2_all_go_cv_results.rds") +T17_3=readRDS("T17_3_all_go_cv_results.rds") +T17_4=readRDS("T17_4_all_go_cv_results.rds") +T17_5=readRDS("T17_5_all_go_cv_results.rds") +T17_6=readRDS("T17_6_all_go_cv_results.rds") +T17_7=readRDS("T17_7_all_go_cv_results.rds") +T17_8=readRDS("T17_8_all_go_cv_results.rds") +T18_1=readRDS("T18_1_all_go_cv_results.rds") +T18_2=readRDS("T18_2_all_go_cv_results.rds") +T18_3=readRDS("T18_3_all_go_cv_results.rds") +T18_4=readRDS("T18_4_all_go_cv_results.rds") +T18_5=readRDS("T18_5_all_go_cv_results.rds") +T18_6=readRDS("T18_6_all_go_cv_results.rds") +T18_7=readRDS("T18_7_all_go_cv_results.rds") +T18_8=readRDS("T18_8_all_go_cv_results.rds") +T19_1=readRDS("T19_1_all_go_cv_results.rds") +T19_2=readRDS("T19_2_all_go_cv_results.rds") +T19_3=readRDS("T19_3_all_go_cv_results.rds") +T19_4=readRDS("T19_4_all_go_cv_results.rds") +T19_5=readRDS("T19_5_all_go_cv_results.rds") +T19_6=readRDS("T19_6_all_go_cv_results.rds") +T19_7=readRDS("T19_7_all_go_cv_results.rds") +T19_8=readRDS("T19_8_all_go_cv_results.rds") +T20_1=readRDS("T20_1_all_go_cv_results.rds") +T20_2=readRDS("T20_2_all_go_cv_results.rds") + +pla_all_go_cv_results=data.frame(matrix(nrow=0,ncol=ncol(T14_1))) +pla_all_go_cv_results=rbind(T14_1,T14_2,T14_3,T14_4,T14_5,T14_6,T14_7,T14_8,T15_1,T15_2,T15_3,T15_4,T15_5,T15_6,T15_7,T15_8,T16_1,T16_2,T16_3,T16_4,T16_5,T16_6,T16_7,T16_8,T17_1,T17_2,T17_3,T17_4,T17_5,T17_6,T17_7,T17_8,T18_1,T18_2,T18_3,T18_4,T18_5,T18_6,T18_7,T18_8,T19_1,T19_2,T19_3,T19_4,T19_5,T19_6,T19_7,T19_8,T20_1,T20_2) +pla_all_go_cv_results[is.na(pla_all_go_cv_results)] <- 0 + + + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/") +saveRDS(pla_all_go_cv_results,"pla_all_go_cv_results.rds") diff --git a/code/using_GO/pla/gblup_vs_gfblup_using_go_analysis1.R b/code/using_GO/pla/gblup_vs_gfblup_using_go_analysis1.R new file mode 100644 index 0000000000000000000000000000000000000000..b950c2c9b784639a5280adb430e620a767cbfef9 --- /dev/null +++ b/code/using_GO/pla/gblup_vs_gfblup_using_go_analysis1.R @@ -0,0 +1,114 @@ +#This analysis uses one step GFBLUP model to find the significant GO terms having higher predictive ability than GBLUP model. +################################################################################################################################## +# A N A L Y S I S +################################################################################################################################## +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") +require(GO.db) + +T20_2_gblup_variances_all=readRDS(file="./T20_2_gblup_variances_all.rds") +T20_2_gblup_prediction_all=readRDS(file="./T20_2_gblup_prediction_all.rds") +T20_2_gfblup_variances_all=readRDS(file="./T20_2_gfblup_variances_all.rds") +T20_2_gfblup_prediction_all=readRDS(file="./T20_2_gfblup_prediction_all.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +################################################################################################################################### +cycles=length(T20_2_gfblup_prediction_all) +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +#################################################################################################################################### +gb_pred=list() +gb_var_G=list() +gb_var_E=list() +gfb_pred=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_pred)=names(go_all_markers_filtered) +gfb_sigma_gf=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_sigma_gf)=names(go_all_markers_filtered) +gfb_sigma_rgf=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_sigma_rgf)=names(go_all_markers_filtered) +gfb_sigma_e=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_sigma_e)=names(go_all_markers_filtered) +gfb_llik=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_llik)=names(go_all_markers_filtered) + +for(r in c(1:cycles)) +{ + gb_llik<-T20_2_gblup_variances_all[[r]]$llik + if(length(T20_2_gblup_prediction_all[[r]]$accuracy$Corr)>0) + { + gb_pred<-c(gb_pred,as.list(T20_2_gblup_prediction_all[[r]]$accuracy$Corr)) + temp=T20_2_gblup_variances_all[[r]]$theta/sum(T20_2_gblup_variances_all[[r]]$theta) + gb_var_G<- c(gb_var_G,as.list(temp["G"])) + gb_var_E<- c(gb_var_E,as.list(temp["E"])) + } + temp1=T20_2_gfblup_prediction_all[[r]] + temp2=T20_2_gfblup_variances_all[[r]] + + for(i in c(1:n)) + { + if(length(temp1[[i]]$accuracy$Corr)>0) + { + gfb_pred[[i]]<-c(gfb_pred[[i]],as.list(temp1[[i]]$accuracy$Corr)) + } + if(length(temp2[[i]]$theta)>0) + { + temp=temp2[[i]]$theta/sum(temp2[[i]]$theta) + gfb_sigma_gf[[i]]<- temp[1] + gfb_sigma_rgf[[i]]<- temp[2] + gfb_sigma_e[[i]]<- temp[3] + gfb_llik[[i]]<-temp2[[i]]$llik + } + } +} + +gblup_var_G=mean(unname(unlist(gb_var_G))) +gblup_var_E=mean(unname(unlist(gb_var_E))) +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +#Compare predictive ability based on all cycles each having k folds +gb=as.vector(unlist(unname(gb_pred))) +T20_2_all<-data.frame(matrix(nrow=n,ncol=15)) +colnames(T20_2_all)<-c("goid","weltch_t_test_pvalue","gblup_mean","gblup_stdev","gblup_sigma_g","gblup_sigma_e","gfblup_mean","gfblup_stdev","gf","rgf","e","gblup_llik","gfblup_llik","LR","wilcox") +#rownames(T20_2_all)<-names(gfb_pred) +for(i in c(1:n)) +{ + gfb=as.vector(unlist(unname(gfb_pred[i]))) + if(length(gb)>0 && length(gfb)>0) + { + t<-t.test(gb,gfb) + w<-wilcox.test(gb,gfb) + T20_2_all[i,1]=names(gfb_pred[i]) + T20_2_all[i,2]=t$p.value #pvalue in the first column + T20_2_all[i,3]=t$estimate[1] #mean of gblup + T20_2_all[i,4]=sd(gb,na.rm=TRUE) #STDEV of gblup + T20_2_all[i,5]= gblup_var_G #Variance of G of gblup + T20_2_all[i,6]= gblup_var_E #Variance of E of gblup + T20_2_all[i,7]=t$estimate[2] #mean of gfblup + T20_2_all[i,8]=sd(gfb,na.rm=TRUE) #STDEV of gfblup + T20_2_all[i,9]=unlist(unname(gfb_sigma_gf[i])) #variance proportion of this go term + T20_2_all[i,10]=unlist(unname(gfb_sigma_rgf[i])) + T20_2_all[i,11]=unlist(unname(gfb_sigma_e[i])) + T20_2_all[i,12]=gb_llik + T20_2_all[i,13]=gfb_llik[[i]] + T20_2_all[i,14]=2*(gfb_llik[[i]]-gb_llik) + T20_2_all[i,15]=w$p.value + + } +} + +T20_2_all_important=T20_2_all[which(T20_2_all$gfblup_mean>T20_2_all$gblup_mean & T20_2_all$weltch_t_test_pvalue<0.05 & T20_2_all$wilcox<0.05),] + +if(nrow(T20_2_all_important)>0) +{ + go_annotation=as.data.frame(unlist(unname(as.list(Term(T20_2_all_important$goid))))) + gn=as.data.frame(go_all_genes_number[which(names(go_all_markers)%in% T20_2_all_important$goid)]) + gm=as.data.frame(go_all_markers_number[which(names(go_all_markers)%in% T20_2_all_important$goid)]) + T20_2_all_important=data.frame(T20_2_all_important,gn,gm,go_annotation) + colnames(T20_2_all_important)=c("goid","weltch_t_test_pvalue","gblup_mean","gblup_stdev","gblup_sigma_g","gblup_sigma_e","gfblup_mean","gfblup_stdev","gf","rgf","e","gblup_llik","gfblup_llik","LR","wilcox","genes_number","markers_number","ontology") +} +write.table(T20_2_all, file="T20_2_gblup_vs_gfblup_all_using_go_analysis1.tbl", sep = "\t", dec = ".",row.names = TRUE, col.names = TRUE) +saveRDS(T20_2_all,"T20_2_all_analysis1.rds") +write.table(T20_2_all_important, file="T20_2_gblup_vs_gfblup_all_using_go_important_analysis1.tbl", sep = "\t", dec = ".",row.names = TRUE, col.names = TRUE) +saveRDS(T20_2_all_important,"T20_2_all_important_analysis1.rds") + + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/code/using_GO/pla/gblup_vs_gfblup_using_go_model.R b/code/using_GO/pla/gblup_vs_gfblup_using_go_model.R new file mode 100644 index 0000000000000000000000000000000000000000..eb87b1a65b145a1db1256cfc5973f7fe46be6caf --- /dev/null +++ b/code/using_GO/pla/gblup_vs_gfblup_using_go_model.R @@ -0,0 +1,129 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno_pla.Rdata") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df_pla.Rdata") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +#filter +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/pla/8fold/gblup_validate_all.rds") +n_folds <- 8 + +T20_2_gblup_variances_all=rep(list(list()),cycles) +T20_2_gblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_variances_all=rep(list(list()),cycles) +T20_2_gfblup_prediction_all=rep(list(list()),cycles) +T20_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df_pla))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df_pla)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df_pla)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("T20_2...") + y=pheno_df_pla$T20_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + n <- length(y) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + T20_2_gblup_variances_all[[r]]<-var + T20_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + T20_2_gblup_variances_all[[r]]<-list() + T20_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + T20_2_gfblup_variances_all[[r]]<-var + T20_2_gfblup_prediction_all[[r]]<-pred + T20_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(T20_2_gblup_variances_all,"T20_2_gblup_variances_all.rds") +saveRDS(T20_2_gblup_prediction_all,"T20_2_gblup_prediction_all.rds") +saveRDS(T20_2_gfblup_variances_all,"T20_2_gfblup_variances_all.rds") +saveRDS(T20_2_gfblup_prediction_all,"T20_2_gfblup_prediction_all.rds") +saveRDS(T20_2_gfblup_validate_all,"T20_2_gfblup_validate_all.rds") + +################################################################################################################################## diff --git a/code/using_GO/pla/pla_all_go_cv_results1.R b/code/using_GO/pla/pla_all_go_cv_results1.R new file mode 100644 index 0000000000000000000000000000000000000000..f5012085684ca74a0515a1241f605580231931c1 --- /dev/null +++ b/code/using_GO/pla/pla_all_go_cv_results1.R @@ -0,0 +1,77 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) +################################################################################################################################## +# PLA GO +################################################################################################################################## +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/pla/8fold/") +require(GO.db) + +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +################################################################################################################################### +cycles=10 +n_folds=8 +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +#################################################################################################################################### +res=data.frame(matrix(nrow=0,ncol=10)) +colnames(res)=c("time","gblup_acc","gblup_G","gblup_E","goid","gfblup_acc","gfblup_Gf","gfblup_rGf","gfblup_E","gfblup_h2f") + +t=args[1] +message(t) + gblup_prediction_all=readRDS(file=paste0(t,"_gblup_prediction_all.rds")) + gfblup_prediction_all=readRDS(file=paste0(t,"_gfblup_prediction_all.rds")) + time=c(rep(t,n_folds)) + + for(r in c(1:cycles)) + { +message(paste("cycle",r)) + temp1=gfblup_prediction_all[[r]] + + for(i in c(1:n)) + { +message(paste("GO#",i)) + goid=c(rep(names(temp1)[i],n_folds)) + if(length(gblup_prediction_all[[r]])>0) + { + gb_pred<-c(gblup_prediction_all[[r]]$accuracy$Corr) + m=as.matrix(gblup_prediction_all[[r]]$theta) + gb_var_G=apply(m,1,function(arg){arg[1]/sum(arg)}) + gb_var_E=apply(m,1,function(arg){arg[2]/sum(arg)}) + + + } + else + { + gb_pred<-c(rep(0,n_folds)) + gb_var_G<-c(rep(0,n_folds)) + gb_var_E<-c(rep(0,n_folds)) + } + + if(length(temp1[[i]])>0) + { + gfb_pred<-c(temp1[[i]]$accuracy$Corr) + m=as.matrix(temp1[[i]]$theta) + gfb_var_Gf=apply(m,1,function(arg){arg[1]/sum(arg)}) + gfb_var_rGf=apply(m,1,function(arg){arg[2]/sum(arg)}) + gfb_var_E=apply(m,1,function(arg){arg[3]/sum(arg)}) + gfb_h2f=(gfb_var_Gf)/(gfb_var_Gf+gfb_var_rGf+gfb_var_E) + + } + else + { + gfb_pred<-c(rep(0,n_folds)) + gfb_var_Gf<-c(rep(0,n_folds)) + gfb_var_rGf<-c(rep(0,n_folds)) + gfb_h2f<-c(rep(0,n_folds)) + gfb_var_E<-c(rep(0,n_folds)) + } + res=rbind(res,data.frame(time,gb_pred,gb_var_G,gb_var_E,goid,gfb_pred,gfb_var_Gf,gfb_var_rGf,gfb_var_E,gfb_h2f)) + } + + } + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +saveRDS(res,paste0(t,"_all_go_cv_results.rds")) +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ No newline at end of file diff --git a/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..f7414119db8f4c4093acf20707b3c4f0a78699ca --- /dev/null +++ b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_1_gblup_variances_all=rep(list(list()),cycles) +H1_1_gblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_variances_all=rep(list(list()),cycles) +H1_1_gfblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_1...") + y=1000000*pheno_df$H1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_1_gblup_variances_all[[r]]<-var + H1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_1_gblup_variances_all[[r]]<-list() + H1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_1_gfblup_variances_all[[r]]<-var + H1_1_gfblup_prediction_all[[r]]<-pred + H1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_1_gblup_variances_all,"H1_1_gblup_variances_all_1001_2000.rds") +saveRDS(H1_1_gblup_prediction_all,"H1_1_gblup_prediction_all_1001_2000.rds") +saveRDS(H1_1_gfblup_variances_all,"H1_1_gfblup_variances_all_1001_2000.rds") +saveRDS(H1_1_gfblup_prediction_all,"H1_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(H1_1_gfblup_validate_all,"H1_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..2ab581f284bf0dddb90aae6bffa471ddfa6d4b7a --- /dev/null +++ b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_1_gblup_variances_all=rep(list(list()),cycles) +H1_1_gblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_variances_all=rep(list(list()),cycles) +H1_1_gfblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_1...") + y=1000000*pheno_df$H1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_1_gblup_variances_all[[r]]<-var + H1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_1_gblup_variances_all[[r]]<-list() + H1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_1_gfblup_variances_all[[r]]<-var + H1_1_gfblup_prediction_all[[r]]<-pred + H1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_1_gblup_variances_all,"H1_1_gblup_variances_all_1_1000.rds") +saveRDS(H1_1_gblup_prediction_all,"H1_1_gblup_prediction_all_1_1000.rds") +saveRDS(H1_1_gfblup_variances_all,"H1_1_gfblup_variances_all_1_1000.rds") +saveRDS(H1_1_gfblup_prediction_all,"H1_1_gfblup_prediction_all_1_1000.rds") +saveRDS(H1_1_gfblup_validate_all,"H1_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..a4786011a53e67f56c98e29f4123eb15e3b90eb5 --- /dev/null +++ b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_1_gblup_variances_all=rep(list(list()),cycles) +H1_1_gblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_variances_all=rep(list(list()),cycles) +H1_1_gfblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_1...") + y=1000000*pheno_df$H1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_1_gblup_variances_all[[r]]<-var + H1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_1_gblup_variances_all[[r]]<-list() + H1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_1_gfblup_variances_all[[r]]<-var + H1_1_gfblup_prediction_all[[r]]<-pred + H1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_1_gblup_variances_all,"H1_1_gblup_variances_all_2001_3000.rds") +saveRDS(H1_1_gblup_prediction_all,"H1_1_gblup_prediction_all_2001_3000.rds") +saveRDS(H1_1_gfblup_variances_all,"H1_1_gfblup_variances_all_2001_3000.rds") +saveRDS(H1_1_gfblup_prediction_all,"H1_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(H1_1_gfblup_validate_all,"H1_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..6b7530fa1b48f89d90b5d0d4d963932606b64a9f --- /dev/null +++ b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_1_gblup_variances_all=rep(list(list()),cycles) +H1_1_gblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_variances_all=rep(list(list()),cycles) +H1_1_gfblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_1...") + y=1000000*pheno_df$H1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_1_gblup_variances_all[[r]]<-var + H1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_1_gblup_variances_all[[r]]<-list() + H1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_1_gfblup_variances_all[[r]]<-var + H1_1_gfblup_prediction_all[[r]]<-pred + H1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_1_gblup_variances_all,"H1_1_gblup_variances_all_3001_4000.rds") +saveRDS(H1_1_gblup_prediction_all,"H1_1_gblup_prediction_all_3001_4000.rds") +saveRDS(H1_1_gfblup_variances_all,"H1_1_gfblup_variances_all_3001_4000.rds") +saveRDS(H1_1_gfblup_prediction_all,"H1_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(H1_1_gfblup_validate_all,"H1_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..fae5c0d4b040fce294e3db9b0951ce421d9f3ca9 --- /dev/null +++ b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_1_gblup_variances_all=rep(list(list()),cycles) +H1_1_gblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_variances_all=rep(list(list()),cycles) +H1_1_gfblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_1...") + y=1000000*pheno_df$H1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_1_gblup_variances_all[[r]]<-var + H1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_1_gblup_variances_all[[r]]<-list() + H1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_1_gfblup_variances_all[[r]]<-var + H1_1_gfblup_prediction_all[[r]]<-pred + H1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_1_gblup_variances_all,"H1_1_gblup_variances_all_4001_5000.rds") +saveRDS(H1_1_gblup_prediction_all,"H1_1_gblup_prediction_all_4001_5000.rds") +saveRDS(H1_1_gfblup_variances_all,"H1_1_gfblup_variances_all_4001_5000.rds") +saveRDS(H1_1_gfblup_prediction_all,"H1_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(H1_1_gfblup_validate_all,"H1_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..3f3ecffc1ec0a808d9a02d456cb346e2e8df3ef5 --- /dev/null +++ b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_1_gblup_variances_all=rep(list(list()),cycles) +H1_1_gblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_variances_all=rep(list(list()),cycles) +H1_1_gfblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_1...") + y=1000000*pheno_df$H1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_1_gblup_variances_all[[r]]<-var + H1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_1_gblup_variances_all[[r]]<-list() + H1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_1_gfblup_variances_all[[r]]<-var + H1_1_gfblup_prediction_all[[r]]<-pred + H1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_1_gblup_variances_all,"H1_1_gblup_variances_all_5001_6000.rds") +saveRDS(H1_1_gblup_prediction_all,"H1_1_gblup_prediction_all_5001_6000.rds") +saveRDS(H1_1_gfblup_variances_all,"H1_1_gfblup_variances_all_5001_6000.rds") +saveRDS(H1_1_gfblup_prediction_all,"H1_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(H1_1_gfblup_validate_all,"H1_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..c03230a712bc072b880abf95001486667f5dbca6 --- /dev/null +++ b/code/using_GO/psii/H1_1_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_1_gblup_variances_all=rep(list(list()),cycles) +H1_1_gblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_variances_all=rep(list(list()),cycles) +H1_1_gfblup_prediction_all=rep(list(list()),cycles) +H1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_1...") + y=1000000*pheno_df$H1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_1_gblup_variances_all[[r]]<-var + H1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_1_gblup_variances_all[[r]]<-list() + H1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_1_gfblup_variances_all[[r]]<-var + H1_1_gfblup_prediction_all[[r]]<-pred + H1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_1_gblup_variances_all,"H1_1_gblup_variances_all_6001_7297.rds") +saveRDS(H1_1_gblup_prediction_all,"H1_1_gblup_prediction_all_6001_7297.rds") +saveRDS(H1_1_gfblup_variances_all,"H1_1_gfblup_variances_all_6001_7297.rds") +saveRDS(H1_1_gfblup_prediction_all,"H1_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(H1_1_gfblup_validate_all,"H1_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..ed128ea2cd9ac0fcfda548e0f0d614466a1c177c --- /dev/null +++ b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_2_gblup_variances_all=rep(list(list()),cycles) +H1_2_gblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_variances_all=rep(list(list()),cycles) +H1_2_gfblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_2...") + y=1000000*pheno_df$H1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_2_gblup_variances_all[[r]]<-var + H1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_2_gblup_variances_all[[r]]<-list() + H1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_2_gfblup_variances_all[[r]]<-var + H1_2_gfblup_prediction_all[[r]]<-pred + H1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_2_gblup_variances_all,"H1_2_gblup_variances_all_1001_2000.rds") +saveRDS(H1_2_gblup_prediction_all,"H1_2_gblup_prediction_all_1001_2000.rds") +saveRDS(H1_2_gfblup_variances_all,"H1_2_gfblup_variances_all_1001_2000.rds") +saveRDS(H1_2_gfblup_prediction_all,"H1_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(H1_2_gfblup_validate_all,"H1_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..8d029e1dacb389c7db7a44415706a7faa16b8663 --- /dev/null +++ b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_2_gblup_variances_all=rep(list(list()),cycles) +H1_2_gblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_variances_all=rep(list(list()),cycles) +H1_2_gfblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_2...") + y=1000000*pheno_df$H1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_2_gblup_variances_all[[r]]<-var + H1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_2_gblup_variances_all[[r]]<-list() + H1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_2_gfblup_variances_all[[r]]<-var + H1_2_gfblup_prediction_all[[r]]<-pred + H1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_2_gblup_variances_all,"H1_2_gblup_variances_all_1_1000.rds") +saveRDS(H1_2_gblup_prediction_all,"H1_2_gblup_prediction_all_1_1000.rds") +saveRDS(H1_2_gfblup_variances_all,"H1_2_gfblup_variances_all_1_1000.rds") +saveRDS(H1_2_gfblup_prediction_all,"H1_2_gfblup_prediction_all_1_1000.rds") +saveRDS(H1_2_gfblup_validate_all,"H1_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..3866ca91cf7d8e68fb311db762b8a36391953aec --- /dev/null +++ b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_2_gblup_variances_all=rep(list(list()),cycles) +H1_2_gblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_variances_all=rep(list(list()),cycles) +H1_2_gfblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_2...") + y=1000000*pheno_df$H1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_2_gblup_variances_all[[r]]<-var + H1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_2_gblup_variances_all[[r]]<-list() + H1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_2_gfblup_variances_all[[r]]<-var + H1_2_gfblup_prediction_all[[r]]<-pred + H1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_2_gblup_variances_all,"H1_2_gblup_variances_all_2001_3000.rds") +saveRDS(H1_2_gblup_prediction_all,"H1_2_gblup_prediction_all_2001_3000.rds") +saveRDS(H1_2_gfblup_variances_all,"H1_2_gfblup_variances_all_2001_3000.rds") +saveRDS(H1_2_gfblup_prediction_all,"H1_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(H1_2_gfblup_validate_all,"H1_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..56a7d65c4bdd34966f0972512be1aef2b1692dfa --- /dev/null +++ b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_2_gblup_variances_all=rep(list(list()),cycles) +H1_2_gblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_variances_all=rep(list(list()),cycles) +H1_2_gfblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_2...") + y=1000000*pheno_df$H1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_2_gblup_variances_all[[r]]<-var + H1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_2_gblup_variances_all[[r]]<-list() + H1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_2_gfblup_variances_all[[r]]<-var + H1_2_gfblup_prediction_all[[r]]<-pred + H1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_2_gblup_variances_all,"H1_2_gblup_variances_all_3001_4000.rds") +saveRDS(H1_2_gblup_prediction_all,"H1_2_gblup_prediction_all_3001_4000.rds") +saveRDS(H1_2_gfblup_variances_all,"H1_2_gfblup_variances_all_3001_4000.rds") +saveRDS(H1_2_gfblup_prediction_all,"H1_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(H1_2_gfblup_validate_all,"H1_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..6a43dc7018cd19a02f6c68fec4b9dc13f5cadd2a --- /dev/null +++ b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_2_gblup_variances_all=rep(list(list()),cycles) +H1_2_gblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_variances_all=rep(list(list()),cycles) +H1_2_gfblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_2...") + y=1000000*pheno_df$H1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_2_gblup_variances_all[[r]]<-var + H1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_2_gblup_variances_all[[r]]<-list() + H1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_2_gfblup_variances_all[[r]]<-var + H1_2_gfblup_prediction_all[[r]]<-pred + H1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_2_gblup_variances_all,"H1_2_gblup_variances_all_4001_5000.rds") +saveRDS(H1_2_gblup_prediction_all,"H1_2_gblup_prediction_all_4001_5000.rds") +saveRDS(H1_2_gfblup_variances_all,"H1_2_gfblup_variances_all_4001_5000.rds") +saveRDS(H1_2_gfblup_prediction_all,"H1_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(H1_2_gfblup_validate_all,"H1_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..ab39bc00e803d7d55882cc9d62a069c3819d373b --- /dev/null +++ b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_2_gblup_variances_all=rep(list(list()),cycles) +H1_2_gblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_variances_all=rep(list(list()),cycles) +H1_2_gfblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_2...") + y=1000000*pheno_df$H1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_2_gblup_variances_all[[r]]<-var + H1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_2_gblup_variances_all[[r]]<-list() + H1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_2_gfblup_variances_all[[r]]<-var + H1_2_gfblup_prediction_all[[r]]<-pred + H1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_2_gblup_variances_all,"H1_2_gblup_variances_all_5001_6000.rds") +saveRDS(H1_2_gblup_prediction_all,"H1_2_gblup_prediction_all_5001_6000.rds") +saveRDS(H1_2_gfblup_variances_all,"H1_2_gfblup_variances_all_5001_6000.rds") +saveRDS(H1_2_gfblup_prediction_all,"H1_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(H1_2_gfblup_validate_all,"H1_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..a45ff72bdedca038fbe42583d6306c7f9c83c13a --- /dev/null +++ b/code/using_GO/psii/H1_2_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_2_gblup_variances_all=rep(list(list()),cycles) +H1_2_gblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_variances_all=rep(list(list()),cycles) +H1_2_gfblup_prediction_all=rep(list(list()),cycles) +H1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_2...") + y=1000000*pheno_df$H1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_2_gblup_variances_all[[r]]<-var + H1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_2_gblup_variances_all[[r]]<-list() + H1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_2_gfblup_variances_all[[r]]<-var + H1_2_gfblup_prediction_all[[r]]<-pred + H1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_2_gblup_variances_all,"H1_2_gblup_variances_all_6001_7297.rds") +saveRDS(H1_2_gblup_prediction_all,"H1_2_gblup_prediction_all_6001_7297.rds") +saveRDS(H1_2_gfblup_variances_all,"H1_2_gfblup_variances_all_6001_7297.rds") +saveRDS(H1_2_gfblup_prediction_all,"H1_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(H1_2_gfblup_validate_all,"H1_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..f1d8fc9c6c7893922e32d53e32186e066babb03c --- /dev/null +++ b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_3_gblup_variances_all=rep(list(list()),cycles) +H1_3_gblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_variances_all=rep(list(list()),cycles) +H1_3_gfblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_3...") + y=1000000*pheno_df$H1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_3_gblup_variances_all[[r]]<-var + H1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_3_gblup_variances_all[[r]]<-list() + H1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_3_gfblup_variances_all[[r]]<-var + H1_3_gfblup_prediction_all[[r]]<-pred + H1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_3_gblup_variances_all,"H1_3_gblup_variances_all_1001_2000.rds") +saveRDS(H1_3_gblup_prediction_all,"H1_3_gblup_prediction_all_1001_2000.rds") +saveRDS(H1_3_gfblup_variances_all,"H1_3_gfblup_variances_all_1001_2000.rds") +saveRDS(H1_3_gfblup_prediction_all,"H1_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(H1_3_gfblup_validate_all,"H1_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..d3c719f744d644c462cc9378241d6fa0ac9bcbee --- /dev/null +++ b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_3_gblup_variances_all=rep(list(list()),cycles) +H1_3_gblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_variances_all=rep(list(list()),cycles) +H1_3_gfblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_3...") + y=1000000*pheno_df$H1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_3_gblup_variances_all[[r]]<-var + H1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_3_gblup_variances_all[[r]]<-list() + H1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_3_gfblup_variances_all[[r]]<-var + H1_3_gfblup_prediction_all[[r]]<-pred + H1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_3_gblup_variances_all,"H1_3_gblup_variances_all_1_1000.rds") +saveRDS(H1_3_gblup_prediction_all,"H1_3_gblup_prediction_all_1_1000.rds") +saveRDS(H1_3_gfblup_variances_all,"H1_3_gfblup_variances_all_1_1000.rds") +saveRDS(H1_3_gfblup_prediction_all,"H1_3_gfblup_prediction_all_1_1000.rds") +saveRDS(H1_3_gfblup_validate_all,"H1_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..2899763606a3c2e1914f6aa2a8b24d5410cb80a0 --- /dev/null +++ b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_3_gblup_variances_all=rep(list(list()),cycles) +H1_3_gblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_variances_all=rep(list(list()),cycles) +H1_3_gfblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_3...") + y=1000000*pheno_df$H1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_3_gblup_variances_all[[r]]<-var + H1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_3_gblup_variances_all[[r]]<-list() + H1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_3_gfblup_variances_all[[r]]<-var + H1_3_gfblup_prediction_all[[r]]<-pred + H1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_3_gblup_variances_all,"H1_3_gblup_variances_all_2001_3000.rds") +saveRDS(H1_3_gblup_prediction_all,"H1_3_gblup_prediction_all_2001_3000.rds") +saveRDS(H1_3_gfblup_variances_all,"H1_3_gfblup_variances_all_2001_3000.rds") +saveRDS(H1_3_gfblup_prediction_all,"H1_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(H1_3_gfblup_validate_all,"H1_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..0f27ff3e73e45d3648af4fba66d3b477f0279d9a --- /dev/null +++ b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_3_gblup_variances_all=rep(list(list()),cycles) +H1_3_gblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_variances_all=rep(list(list()),cycles) +H1_3_gfblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_3...") + y=1000000*pheno_df$H1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_3_gblup_variances_all[[r]]<-var + H1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_3_gblup_variances_all[[r]]<-list() + H1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_3_gfblup_variances_all[[r]]<-var + H1_3_gfblup_prediction_all[[r]]<-pred + H1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_3_gblup_variances_all,"H1_3_gblup_variances_all_3001_4000.rds") +saveRDS(H1_3_gblup_prediction_all,"H1_3_gblup_prediction_all_3001_4000.rds") +saveRDS(H1_3_gfblup_variances_all,"H1_3_gfblup_variances_all_3001_4000.rds") +saveRDS(H1_3_gfblup_prediction_all,"H1_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(H1_3_gfblup_validate_all,"H1_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..7927c0484c95d71f44468ffc73dc1a082b0a4f09 --- /dev/null +++ b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_3_gblup_variances_all=rep(list(list()),cycles) +H1_3_gblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_variances_all=rep(list(list()),cycles) +H1_3_gfblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_3...") + y=1000000*pheno_df$H1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_3_gblup_variances_all[[r]]<-var + H1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_3_gblup_variances_all[[r]]<-list() + H1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_3_gfblup_variances_all[[r]]<-var + H1_3_gfblup_prediction_all[[r]]<-pred + H1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_3_gblup_variances_all,"H1_3_gblup_variances_all_4001_5000.rds") +saveRDS(H1_3_gblup_prediction_all,"H1_3_gblup_prediction_all_4001_5000.rds") +saveRDS(H1_3_gfblup_variances_all,"H1_3_gfblup_variances_all_4001_5000.rds") +saveRDS(H1_3_gfblup_prediction_all,"H1_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(H1_3_gfblup_validate_all,"H1_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..639d45a6d9b44cae8223eb744ec5267063b4a09d --- /dev/null +++ b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_3_gblup_variances_all=rep(list(list()),cycles) +H1_3_gblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_variances_all=rep(list(list()),cycles) +H1_3_gfblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_3...") + y=1000000*pheno_df$H1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_3_gblup_variances_all[[r]]<-var + H1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_3_gblup_variances_all[[r]]<-list() + H1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_3_gfblup_variances_all[[r]]<-var + H1_3_gfblup_prediction_all[[r]]<-pred + H1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_3_gblup_variances_all,"H1_3_gblup_variances_all_5001_6000.rds") +saveRDS(H1_3_gblup_prediction_all,"H1_3_gblup_prediction_all_5001_6000.rds") +saveRDS(H1_3_gfblup_variances_all,"H1_3_gfblup_variances_all_5001_6000.rds") +saveRDS(H1_3_gfblup_prediction_all,"H1_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(H1_3_gfblup_validate_all,"H1_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..4903f5460401ca4d8fc9746435f15be77159a6f0 --- /dev/null +++ b/code/using_GO/psii/H1_3_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H1_3_gblup_variances_all=rep(list(list()),cycles) +H1_3_gblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_variances_all=rep(list(list()),cycles) +H1_3_gfblup_prediction_all=rep(list(list()),cycles) +H1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H1_3...") + y=1000000*pheno_df$H1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H1_3_gblup_variances_all[[r]]<-var + H1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H1_3_gblup_variances_all[[r]]<-list() + H1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H1_3_gfblup_variances_all[[r]]<-var + H1_3_gfblup_prediction_all[[r]]<-pred + H1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H1_3_gblup_variances_all,"H1_3_gblup_variances_all_6001_7297.rds") +saveRDS(H1_3_gblup_prediction_all,"H1_3_gblup_prediction_all_6001_7297.rds") +saveRDS(H1_3_gfblup_variances_all,"H1_3_gfblup_variances_all_6001_7297.rds") +saveRDS(H1_3_gfblup_prediction_all,"H1_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(H1_3_gfblup_validate_all,"H1_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..225186e7ec613ded7632616c74e6e56cebfee307 --- /dev/null +++ b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_1_gblup_variances_all=rep(list(list()),cycles) +H2_1_gblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_variances_all=rep(list(list()),cycles) +H2_1_gfblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_1...") + y=1000000*pheno_df$H2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_1_gblup_variances_all[[r]]<-var + H2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_1_gblup_variances_all[[r]]<-list() + H2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_1_gfblup_variances_all[[r]]<-var + H2_1_gfblup_prediction_all[[r]]<-pred + H2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_1_gblup_variances_all,"H2_1_gblup_variances_all_1001_2000.rds") +saveRDS(H2_1_gblup_prediction_all,"H2_1_gblup_prediction_all_1001_2000.rds") +saveRDS(H2_1_gfblup_variances_all,"H2_1_gfblup_variances_all_1001_2000.rds") +saveRDS(H2_1_gfblup_prediction_all,"H2_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(H2_1_gfblup_validate_all,"H2_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..90b9831b41b77a469566d25f9b3c7b7940ec23fe --- /dev/null +++ b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_1_gblup_variances_all=rep(list(list()),cycles) +H2_1_gblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_variances_all=rep(list(list()),cycles) +H2_1_gfblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_1...") + y=1000000*pheno_df$H2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_1_gblup_variances_all[[r]]<-var + H2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_1_gblup_variances_all[[r]]<-list() + H2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_1_gfblup_variances_all[[r]]<-var + H2_1_gfblup_prediction_all[[r]]<-pred + H2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_1_gblup_variances_all,"H2_1_gblup_variances_all_1_1000.rds") +saveRDS(H2_1_gblup_prediction_all,"H2_1_gblup_prediction_all_1_1000.rds") +saveRDS(H2_1_gfblup_variances_all,"H2_1_gfblup_variances_all_1_1000.rds") +saveRDS(H2_1_gfblup_prediction_all,"H2_1_gfblup_prediction_all_1_1000.rds") +saveRDS(H2_1_gfblup_validate_all,"H2_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..f12c39d5b94e06d22eb1ca0b924f1f6f868349d2 --- /dev/null +++ b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_1_gblup_variances_all=rep(list(list()),cycles) +H2_1_gblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_variances_all=rep(list(list()),cycles) +H2_1_gfblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_1...") + y=1000000*pheno_df$H2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_1_gblup_variances_all[[r]]<-var + H2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_1_gblup_variances_all[[r]]<-list() + H2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_1_gfblup_variances_all[[r]]<-var + H2_1_gfblup_prediction_all[[r]]<-pred + H2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_1_gblup_variances_all,"H2_1_gblup_variances_all_2001_3000.rds") +saveRDS(H2_1_gblup_prediction_all,"H2_1_gblup_prediction_all_2001_3000.rds") +saveRDS(H2_1_gfblup_variances_all,"H2_1_gfblup_variances_all_2001_3000.rds") +saveRDS(H2_1_gfblup_prediction_all,"H2_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(H2_1_gfblup_validate_all,"H2_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..4034e9118634a2b32b51946d72211842c1465fb7 --- /dev/null +++ b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_1_gblup_variances_all=rep(list(list()),cycles) +H2_1_gblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_variances_all=rep(list(list()),cycles) +H2_1_gfblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_1...") + y=1000000*pheno_df$H2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_1_gblup_variances_all[[r]]<-var + H2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_1_gblup_variances_all[[r]]<-list() + H2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_1_gfblup_variances_all[[r]]<-var + H2_1_gfblup_prediction_all[[r]]<-pred + H2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_1_gblup_variances_all,"H2_1_gblup_variances_all_3001_4000.rds") +saveRDS(H2_1_gblup_prediction_all,"H2_1_gblup_prediction_all_3001_4000.rds") +saveRDS(H2_1_gfblup_variances_all,"H2_1_gfblup_variances_all_3001_4000.rds") +saveRDS(H2_1_gfblup_prediction_all,"H2_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(H2_1_gfblup_validate_all,"H2_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..d3d652c9398684865a514ec4f3c837a44def5218 --- /dev/null +++ b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_1_gblup_variances_all=rep(list(list()),cycles) +H2_1_gblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_variances_all=rep(list(list()),cycles) +H2_1_gfblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_1...") + y=1000000*pheno_df$H2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_1_gblup_variances_all[[r]]<-var + H2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_1_gblup_variances_all[[r]]<-list() + H2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_1_gfblup_variances_all[[r]]<-var + H2_1_gfblup_prediction_all[[r]]<-pred + H2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_1_gblup_variances_all,"H2_1_gblup_variances_all_4001_5000.rds") +saveRDS(H2_1_gblup_prediction_all,"H2_1_gblup_prediction_all_4001_5000.rds") +saveRDS(H2_1_gfblup_variances_all,"H2_1_gfblup_variances_all_4001_5000.rds") +saveRDS(H2_1_gfblup_prediction_all,"H2_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(H2_1_gfblup_validate_all,"H2_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..c8cac39346b93a526ccf62d24d85a825434d309a --- /dev/null +++ b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_1_gblup_variances_all=rep(list(list()),cycles) +H2_1_gblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_variances_all=rep(list(list()),cycles) +H2_1_gfblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_1...") + y=1000000*pheno_df$H2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_1_gblup_variances_all[[r]]<-var + H2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_1_gblup_variances_all[[r]]<-list() + H2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_1_gfblup_variances_all[[r]]<-var + H2_1_gfblup_prediction_all[[r]]<-pred + H2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_1_gblup_variances_all,"H2_1_gblup_variances_all_5001_6000.rds") +saveRDS(H2_1_gblup_prediction_all,"H2_1_gblup_prediction_all_5001_6000.rds") +saveRDS(H2_1_gfblup_variances_all,"H2_1_gfblup_variances_all_5001_6000.rds") +saveRDS(H2_1_gfblup_prediction_all,"H2_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(H2_1_gfblup_validate_all,"H2_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..32fc295e28fe99460ebf9580f29e82a1989e3462 --- /dev/null +++ b/code/using_GO/psii/H2_1_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_1_gblup_variances_all=rep(list(list()),cycles) +H2_1_gblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_variances_all=rep(list(list()),cycles) +H2_1_gfblup_prediction_all=rep(list(list()),cycles) +H2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_1...") + y=1000000*pheno_df$H2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_1_gblup_variances_all[[r]]<-var + H2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_1_gblup_variances_all[[r]]<-list() + H2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_1_gfblup_variances_all[[r]]<-var + H2_1_gfblup_prediction_all[[r]]<-pred + H2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_1_gblup_variances_all,"H2_1_gblup_variances_all_6001_7297.rds") +saveRDS(H2_1_gblup_prediction_all,"H2_1_gblup_prediction_all_6001_7297.rds") +saveRDS(H2_1_gfblup_variances_all,"H2_1_gfblup_variances_all_6001_7297.rds") +saveRDS(H2_1_gfblup_prediction_all,"H2_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(H2_1_gfblup_validate_all,"H2_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..ad41840b22f96c4bf6940453a27dd7581c057551 --- /dev/null +++ b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_2_gblup_variances_all=rep(list(list()),cycles) +H2_2_gblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_variances_all=rep(list(list()),cycles) +H2_2_gfblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_2...") + y=1000000*pheno_df$H2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_2_gblup_variances_all[[r]]<-var + H2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_2_gblup_variances_all[[r]]<-list() + H2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_2_gfblup_variances_all[[r]]<-var + H2_2_gfblup_prediction_all[[r]]<-pred + H2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_2_gblup_variances_all,"H2_2_gblup_variances_all_1001_2000.rds") +saveRDS(H2_2_gblup_prediction_all,"H2_2_gblup_prediction_all_1001_2000.rds") +saveRDS(H2_2_gfblup_variances_all,"H2_2_gfblup_variances_all_1001_2000.rds") +saveRDS(H2_2_gfblup_prediction_all,"H2_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(H2_2_gfblup_validate_all,"H2_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..fa04203cb779c1f187643f5ab66552c7b64a75f1 --- /dev/null +++ b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_2_gblup_variances_all=rep(list(list()),cycles) +H2_2_gblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_variances_all=rep(list(list()),cycles) +H2_2_gfblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_2...") + y=1000000*pheno_df$H2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_2_gblup_variances_all[[r]]<-var + H2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_2_gblup_variances_all[[r]]<-list() + H2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_2_gfblup_variances_all[[r]]<-var + H2_2_gfblup_prediction_all[[r]]<-pred + H2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_2_gblup_variances_all,"H2_2_gblup_variances_all_1_1000.rds") +saveRDS(H2_2_gblup_prediction_all,"H2_2_gblup_prediction_all_1_1000.rds") +saveRDS(H2_2_gfblup_variances_all,"H2_2_gfblup_variances_all_1_1000.rds") +saveRDS(H2_2_gfblup_prediction_all,"H2_2_gfblup_prediction_all_1_1000.rds") +saveRDS(H2_2_gfblup_validate_all,"H2_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..88a25b4df318cb880d44e1838f61cec5e91fbe70 --- /dev/null +++ b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_2_gblup_variances_all=rep(list(list()),cycles) +H2_2_gblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_variances_all=rep(list(list()),cycles) +H2_2_gfblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_2...") + y=1000000*pheno_df$H2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_2_gblup_variances_all[[r]]<-var + H2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_2_gblup_variances_all[[r]]<-list() + H2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_2_gfblup_variances_all[[r]]<-var + H2_2_gfblup_prediction_all[[r]]<-pred + H2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_2_gblup_variances_all,"H2_2_gblup_variances_all_2001_3000.rds") +saveRDS(H2_2_gblup_prediction_all,"H2_2_gblup_prediction_all_2001_3000.rds") +saveRDS(H2_2_gfblup_variances_all,"H2_2_gfblup_variances_all_2001_3000.rds") +saveRDS(H2_2_gfblup_prediction_all,"H2_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(H2_2_gfblup_validate_all,"H2_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..340b027e0ef5e78ce8a15460e983b277036fcfe3 --- /dev/null +++ b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_2_gblup_variances_all=rep(list(list()),cycles) +H2_2_gblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_variances_all=rep(list(list()),cycles) +H2_2_gfblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_2...") + y=1000000*pheno_df$H2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_2_gblup_variances_all[[r]]<-var + H2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_2_gblup_variances_all[[r]]<-list() + H2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_2_gfblup_variances_all[[r]]<-var + H2_2_gfblup_prediction_all[[r]]<-pred + H2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_2_gblup_variances_all,"H2_2_gblup_variances_all_3001_4000.rds") +saveRDS(H2_2_gblup_prediction_all,"H2_2_gblup_prediction_all_3001_4000.rds") +saveRDS(H2_2_gfblup_variances_all,"H2_2_gfblup_variances_all_3001_4000.rds") +saveRDS(H2_2_gfblup_prediction_all,"H2_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(H2_2_gfblup_validate_all,"H2_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..e4d3965e73b5789c4b61eaef581550a244e5037a --- /dev/null +++ b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_2_gblup_variances_all=rep(list(list()),cycles) +H2_2_gblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_variances_all=rep(list(list()),cycles) +H2_2_gfblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_2...") + y=1000000*pheno_df$H2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_2_gblup_variances_all[[r]]<-var + H2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_2_gblup_variances_all[[r]]<-list() + H2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_2_gfblup_variances_all[[r]]<-var + H2_2_gfblup_prediction_all[[r]]<-pred + H2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_2_gblup_variances_all,"H2_2_gblup_variances_all_4001_5000.rds") +saveRDS(H2_2_gblup_prediction_all,"H2_2_gblup_prediction_all_4001_5000.rds") +saveRDS(H2_2_gfblup_variances_all,"H2_2_gfblup_variances_all_4001_5000.rds") +saveRDS(H2_2_gfblup_prediction_all,"H2_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(H2_2_gfblup_validate_all,"H2_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..b6983dd3f49609d975523ff6f92c2457a4f11a96 --- /dev/null +++ b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_2_gblup_variances_all=rep(list(list()),cycles) +H2_2_gblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_variances_all=rep(list(list()),cycles) +H2_2_gfblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_2...") + y=1000000*pheno_df$H2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_2_gblup_variances_all[[r]]<-var + H2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_2_gblup_variances_all[[r]]<-list() + H2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_2_gfblup_variances_all[[r]]<-var + H2_2_gfblup_prediction_all[[r]]<-pred + H2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_2_gblup_variances_all,"H2_2_gblup_variances_all_5001_6000.rds") +saveRDS(H2_2_gblup_prediction_all,"H2_2_gblup_prediction_all_5001_6000.rds") +saveRDS(H2_2_gfblup_variances_all,"H2_2_gfblup_variances_all_5001_6000.rds") +saveRDS(H2_2_gfblup_prediction_all,"H2_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(H2_2_gfblup_validate_all,"H2_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..97d1f6f4d3fc7ef5b2fd2430268648f03f44f780 --- /dev/null +++ b/code/using_GO/psii/H2_2_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_2_gblup_variances_all=rep(list(list()),cycles) +H2_2_gblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_variances_all=rep(list(list()),cycles) +H2_2_gfblup_prediction_all=rep(list(list()),cycles) +H2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_2...") + y=1000000*pheno_df$H2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_2_gblup_variances_all[[r]]<-var + H2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_2_gblup_variances_all[[r]]<-list() + H2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_2_gfblup_variances_all[[r]]<-var + H2_2_gfblup_prediction_all[[r]]<-pred + H2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_2_gblup_variances_all,"H2_2_gblup_variances_all_6001_7297.rds") +saveRDS(H2_2_gblup_prediction_all,"H2_2_gblup_prediction_all_6001_7297.rds") +saveRDS(H2_2_gfblup_variances_all,"H2_2_gfblup_variances_all_6001_7297.rds") +saveRDS(H2_2_gfblup_prediction_all,"H2_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(H2_2_gfblup_validate_all,"H2_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..89bddf5f83b949a73c7e9f41790d3895c45002cd --- /dev/null +++ b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_3_gblup_variances_all=rep(list(list()),cycles) +H2_3_gblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_variances_all=rep(list(list()),cycles) +H2_3_gfblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_3...") + y=1000000*pheno_df$H2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_3_gblup_variances_all[[r]]<-var + H2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_3_gblup_variances_all[[r]]<-list() + H2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_3_gfblup_variances_all[[r]]<-var + H2_3_gfblup_prediction_all[[r]]<-pred + H2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_3_gblup_variances_all,"H2_3_gblup_variances_all_1001_2000.rds") +saveRDS(H2_3_gblup_prediction_all,"H2_3_gblup_prediction_all_1001_2000.rds") +saveRDS(H2_3_gfblup_variances_all,"H2_3_gfblup_variances_all_1001_2000.rds") +saveRDS(H2_3_gfblup_prediction_all,"H2_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(H2_3_gfblup_validate_all,"H2_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..67a2f50227c3884ecc89178303575d24410a1024 --- /dev/null +++ b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_3_gblup_variances_all=rep(list(list()),cycles) +H2_3_gblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_variances_all=rep(list(list()),cycles) +H2_3_gfblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_3...") + y=1000000*pheno_df$H2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_3_gblup_variances_all[[r]]<-var + H2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_3_gblup_variances_all[[r]]<-list() + H2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_3_gfblup_variances_all[[r]]<-var + H2_3_gfblup_prediction_all[[r]]<-pred + H2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_3_gblup_variances_all,"H2_3_gblup_variances_all_1_1000.rds") +saveRDS(H2_3_gblup_prediction_all,"H2_3_gblup_prediction_all_1_1000.rds") +saveRDS(H2_3_gfblup_variances_all,"H2_3_gfblup_variances_all_1_1000.rds") +saveRDS(H2_3_gfblup_prediction_all,"H2_3_gfblup_prediction_all_1_1000.rds") +saveRDS(H2_3_gfblup_validate_all,"H2_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..8da00a4fd2f9f7a003cc78d061f9172843a4a865 --- /dev/null +++ b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_3_gblup_variances_all=rep(list(list()),cycles) +H2_3_gblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_variances_all=rep(list(list()),cycles) +H2_3_gfblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_3...") + y=1000000*pheno_df$H2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_3_gblup_variances_all[[r]]<-var + H2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_3_gblup_variances_all[[r]]<-list() + H2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_3_gfblup_variances_all[[r]]<-var + H2_3_gfblup_prediction_all[[r]]<-pred + H2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_3_gblup_variances_all,"H2_3_gblup_variances_all_2001_3000.rds") +saveRDS(H2_3_gblup_prediction_all,"H2_3_gblup_prediction_all_2001_3000.rds") +saveRDS(H2_3_gfblup_variances_all,"H2_3_gfblup_variances_all_2001_3000.rds") +saveRDS(H2_3_gfblup_prediction_all,"H2_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(H2_3_gfblup_validate_all,"H2_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..5b2e912cf1e91fb4df0a753c337935e5489e43ab --- /dev/null +++ b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_3_gblup_variances_all=rep(list(list()),cycles) +H2_3_gblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_variances_all=rep(list(list()),cycles) +H2_3_gfblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_3...") + y=1000000*pheno_df$H2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_3_gblup_variances_all[[r]]<-var + H2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_3_gblup_variances_all[[r]]<-list() + H2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_3_gfblup_variances_all[[r]]<-var + H2_3_gfblup_prediction_all[[r]]<-pred + H2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_3_gblup_variances_all,"H2_3_gblup_variances_all_3001_4000.rds") +saveRDS(H2_3_gblup_prediction_all,"H2_3_gblup_prediction_all_3001_4000.rds") +saveRDS(H2_3_gfblup_variances_all,"H2_3_gfblup_variances_all_3001_4000.rds") +saveRDS(H2_3_gfblup_prediction_all,"H2_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(H2_3_gfblup_validate_all,"H2_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..0c900a357fa19c491da66a91fde9d9a13cc540d0 --- /dev/null +++ b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_3_gblup_variances_all=rep(list(list()),cycles) +H2_3_gblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_variances_all=rep(list(list()),cycles) +H2_3_gfblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_3...") + y=1000000*pheno_df$H2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_3_gblup_variances_all[[r]]<-var + H2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_3_gblup_variances_all[[r]]<-list() + H2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_3_gfblup_variances_all[[r]]<-var + H2_3_gfblup_prediction_all[[r]]<-pred + H2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_3_gblup_variances_all,"H2_3_gblup_variances_all_4001_5000.rds") +saveRDS(H2_3_gblup_prediction_all,"H2_3_gblup_prediction_all_4001_5000.rds") +saveRDS(H2_3_gfblup_variances_all,"H2_3_gfblup_variances_all_4001_5000.rds") +saveRDS(H2_3_gfblup_prediction_all,"H2_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(H2_3_gfblup_validate_all,"H2_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..fe73ba48d121afba83945a43d246d3ae5e1ca4a9 --- /dev/null +++ b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_3_gblup_variances_all=rep(list(list()),cycles) +H2_3_gblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_variances_all=rep(list(list()),cycles) +H2_3_gfblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_3...") + y=1000000*pheno_df$H2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_3_gblup_variances_all[[r]]<-var + H2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_3_gblup_variances_all[[r]]<-list() + H2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_3_gfblup_variances_all[[r]]<-var + H2_3_gfblup_prediction_all[[r]]<-pred + H2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_3_gblup_variances_all,"H2_3_gblup_variances_all_5001_6000.rds") +saveRDS(H2_3_gblup_prediction_all,"H2_3_gblup_prediction_all_5001_6000.rds") +saveRDS(H2_3_gfblup_variances_all,"H2_3_gfblup_variances_all_5001_6000.rds") +saveRDS(H2_3_gfblup_prediction_all,"H2_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(H2_3_gfblup_validate_all,"H2_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..b24350f446d4081e0bd235352b25b1e1612f89eb --- /dev/null +++ b/code/using_GO/psii/H2_3_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H2_3_gblup_variances_all=rep(list(list()),cycles) +H2_3_gblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_variances_all=rep(list(list()),cycles) +H2_3_gfblup_prediction_all=rep(list(list()),cycles) +H2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H2_3...") + y=1000000*pheno_df$H2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H2_3_gblup_variances_all[[r]]<-var + H2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H2_3_gblup_variances_all[[r]]<-list() + H2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H2_3_gfblup_variances_all[[r]]<-var + H2_3_gfblup_prediction_all[[r]]<-pred + H2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H2_3_gblup_variances_all,"H2_3_gblup_variances_all_6001_7297.rds") +saveRDS(H2_3_gblup_prediction_all,"H2_3_gblup_prediction_all_6001_7297.rds") +saveRDS(H2_3_gfblup_variances_all,"H2_3_gfblup_variances_all_6001_7297.rds") +saveRDS(H2_3_gfblup_prediction_all,"H2_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(H2_3_gfblup_validate_all,"H2_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..8d035dabff466c0ce709f75f65fd3e051a15ec23 --- /dev/null +++ b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_1_gblup_variances_all=rep(list(list()),cycles) +H3_1_gblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_variances_all=rep(list(list()),cycles) +H3_1_gfblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_1...") + y=1000000*pheno_df$H3_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_1_gblup_variances_all[[r]]<-var + H3_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_1_gblup_variances_all[[r]]<-list() + H3_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_1_gfblup_variances_all[[r]]<-var + H3_1_gfblup_prediction_all[[r]]<-pred + H3_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_1_gblup_variances_all,"H3_1_gblup_variances_all_1001_2000.rds") +saveRDS(H3_1_gblup_prediction_all,"H3_1_gblup_prediction_all_1001_2000.rds") +saveRDS(H3_1_gfblup_variances_all,"H3_1_gfblup_variances_all_1001_2000.rds") +saveRDS(H3_1_gfblup_prediction_all,"H3_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(H3_1_gfblup_validate_all,"H3_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..8eacd62e7af52bb30bd1260f8c79c4d144a4bae9 --- /dev/null +++ b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_1_gblup_variances_all=rep(list(list()),cycles) +H3_1_gblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_variances_all=rep(list(list()),cycles) +H3_1_gfblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_1...") + y=1000000*pheno_df$H3_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_1_gblup_variances_all[[r]]<-var + H3_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_1_gblup_variances_all[[r]]<-list() + H3_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_1_gfblup_variances_all[[r]]<-var + H3_1_gfblup_prediction_all[[r]]<-pred + H3_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_1_gblup_variances_all,"H3_1_gblup_variances_all_1_1000.rds") +saveRDS(H3_1_gblup_prediction_all,"H3_1_gblup_prediction_all_1_1000.rds") +saveRDS(H3_1_gfblup_variances_all,"H3_1_gfblup_variances_all_1_1000.rds") +saveRDS(H3_1_gfblup_prediction_all,"H3_1_gfblup_prediction_all_1_1000.rds") +saveRDS(H3_1_gfblup_validate_all,"H3_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..be7f880279540a2ba35189dd0ee11748b0f460f6 --- /dev/null +++ b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_1_gblup_variances_all=rep(list(list()),cycles) +H3_1_gblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_variances_all=rep(list(list()),cycles) +H3_1_gfblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_1...") + y=1000000*pheno_df$H3_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_1_gblup_variances_all[[r]]<-var + H3_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_1_gblup_variances_all[[r]]<-list() + H3_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_1_gfblup_variances_all[[r]]<-var + H3_1_gfblup_prediction_all[[r]]<-pred + H3_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_1_gblup_variances_all,"H3_1_gblup_variances_all_2001_3000.rds") +saveRDS(H3_1_gblup_prediction_all,"H3_1_gblup_prediction_all_2001_3000.rds") +saveRDS(H3_1_gfblup_variances_all,"H3_1_gfblup_variances_all_2001_3000.rds") +saveRDS(H3_1_gfblup_prediction_all,"H3_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(H3_1_gfblup_validate_all,"H3_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..8033c465dc0bf81057f28a17bd161260dda2cc98 --- /dev/null +++ b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_1_gblup_variances_all=rep(list(list()),cycles) +H3_1_gblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_variances_all=rep(list(list()),cycles) +H3_1_gfblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_1...") + y=1000000*pheno_df$H3_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_1_gblup_variances_all[[r]]<-var + H3_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_1_gblup_variances_all[[r]]<-list() + H3_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_1_gfblup_variances_all[[r]]<-var + H3_1_gfblup_prediction_all[[r]]<-pred + H3_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_1_gblup_variances_all,"H3_1_gblup_variances_all_3001_4000.rds") +saveRDS(H3_1_gblup_prediction_all,"H3_1_gblup_prediction_all_3001_4000.rds") +saveRDS(H3_1_gfblup_variances_all,"H3_1_gfblup_variances_all_3001_4000.rds") +saveRDS(H3_1_gfblup_prediction_all,"H3_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(H3_1_gfblup_validate_all,"H3_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..7ea139f4cc0c547a6bb6fa0b315a042d3959cf82 --- /dev/null +++ b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_1_gblup_variances_all=rep(list(list()),cycles) +H3_1_gblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_variances_all=rep(list(list()),cycles) +H3_1_gfblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_1...") + y=1000000*pheno_df$H3_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_1_gblup_variances_all[[r]]<-var + H3_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_1_gblup_variances_all[[r]]<-list() + H3_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_1_gfblup_variances_all[[r]]<-var + H3_1_gfblup_prediction_all[[r]]<-pred + H3_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_1_gblup_variances_all,"H3_1_gblup_variances_all_4001_5000.rds") +saveRDS(H3_1_gblup_prediction_all,"H3_1_gblup_prediction_all_4001_5000.rds") +saveRDS(H3_1_gfblup_variances_all,"H3_1_gfblup_variances_all_4001_5000.rds") +saveRDS(H3_1_gfblup_prediction_all,"H3_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(H3_1_gfblup_validate_all,"H3_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..e459e91e17b14e511fe4f3794c747b2ebefcff57 --- /dev/null +++ b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_1_gblup_variances_all=rep(list(list()),cycles) +H3_1_gblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_variances_all=rep(list(list()),cycles) +H3_1_gfblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_1...") + y=1000000*pheno_df$H3_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_1_gblup_variances_all[[r]]<-var + H3_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_1_gblup_variances_all[[r]]<-list() + H3_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_1_gfblup_variances_all[[r]]<-var + H3_1_gfblup_prediction_all[[r]]<-pred + H3_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_1_gblup_variances_all,"H3_1_gblup_variances_all_5001_6000.rds") +saveRDS(H3_1_gblup_prediction_all,"H3_1_gblup_prediction_all_5001_6000.rds") +saveRDS(H3_1_gfblup_variances_all,"H3_1_gfblup_variances_all_5001_6000.rds") +saveRDS(H3_1_gfblup_prediction_all,"H3_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(H3_1_gfblup_validate_all,"H3_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..b640594d0a57ea44d08d1adf24947ade0e9f7b2e --- /dev/null +++ b/code/using_GO/psii/H3_1_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_1_gblup_variances_all=rep(list(list()),cycles) +H3_1_gblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_variances_all=rep(list(list()),cycles) +H3_1_gfblup_prediction_all=rep(list(list()),cycles) +H3_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_1...") + y=1000000*pheno_df$H3_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_1_gblup_variances_all[[r]]<-var + H3_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_1_gblup_variances_all[[r]]<-list() + H3_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_1_gfblup_variances_all[[r]]<-var + H3_1_gfblup_prediction_all[[r]]<-pred + H3_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_1_gblup_variances_all,"H3_1_gblup_variances_all_6001_7297.rds") +saveRDS(H3_1_gblup_prediction_all,"H3_1_gblup_prediction_all_6001_7297.rds") +saveRDS(H3_1_gfblup_variances_all,"H3_1_gfblup_variances_all_6001_7297.rds") +saveRDS(H3_1_gfblup_prediction_all,"H3_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(H3_1_gfblup_validate_all,"H3_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..e43eb5f6e21ea38237c6225c44627cef655e767e --- /dev/null +++ b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_2_gblup_variances_all=rep(list(list()),cycles) +H3_2_gblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_variances_all=rep(list(list()),cycles) +H3_2_gfblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_2...") + y=1000000*pheno_df$H3_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_2_gblup_variances_all[[r]]<-var + H3_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_2_gblup_variances_all[[r]]<-list() + H3_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_2_gfblup_variances_all[[r]]<-var + H3_2_gfblup_prediction_all[[r]]<-pred + H3_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_2_gblup_variances_all,"H3_2_gblup_variances_all_1001_2000.rds") +saveRDS(H3_2_gblup_prediction_all,"H3_2_gblup_prediction_all_1001_2000.rds") +saveRDS(H3_2_gfblup_variances_all,"H3_2_gfblup_variances_all_1001_2000.rds") +saveRDS(H3_2_gfblup_prediction_all,"H3_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(H3_2_gfblup_validate_all,"H3_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..136055eeaf89590715a9a31ccdc98d44ebe8e260 --- /dev/null +++ b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_2_gblup_variances_all=rep(list(list()),cycles) +H3_2_gblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_variances_all=rep(list(list()),cycles) +H3_2_gfblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_2...") + y=1000000*pheno_df$H3_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_2_gblup_variances_all[[r]]<-var + H3_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_2_gblup_variances_all[[r]]<-list() + H3_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_2_gfblup_variances_all[[r]]<-var + H3_2_gfblup_prediction_all[[r]]<-pred + H3_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_2_gblup_variances_all,"H3_2_gblup_variances_all_1_1000.rds") +saveRDS(H3_2_gblup_prediction_all,"H3_2_gblup_prediction_all_1_1000.rds") +saveRDS(H3_2_gfblup_variances_all,"H3_2_gfblup_variances_all_1_1000.rds") +saveRDS(H3_2_gfblup_prediction_all,"H3_2_gfblup_prediction_all_1_1000.rds") +saveRDS(H3_2_gfblup_validate_all,"H3_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..63d03f4d5cab62ac2911bc50afb91a309c7f9354 --- /dev/null +++ b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_2_gblup_variances_all=rep(list(list()),cycles) +H3_2_gblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_variances_all=rep(list(list()),cycles) +H3_2_gfblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_2...") + y=1000000*pheno_df$H3_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_2_gblup_variances_all[[r]]<-var + H3_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_2_gblup_variances_all[[r]]<-list() + H3_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_2_gfblup_variances_all[[r]]<-var + H3_2_gfblup_prediction_all[[r]]<-pred + H3_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_2_gblup_variances_all,"H3_2_gblup_variances_all_2001_3000.rds") +saveRDS(H3_2_gblup_prediction_all,"H3_2_gblup_prediction_all_2001_3000.rds") +saveRDS(H3_2_gfblup_variances_all,"H3_2_gfblup_variances_all_2001_3000.rds") +saveRDS(H3_2_gfblup_prediction_all,"H3_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(H3_2_gfblup_validate_all,"H3_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..9cb4239e7618cf2e1f9631fd3d04df391dc5b274 --- /dev/null +++ b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_2_gblup_variances_all=rep(list(list()),cycles) +H3_2_gblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_variances_all=rep(list(list()),cycles) +H3_2_gfblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_2...") + y=1000000*pheno_df$H3_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_2_gblup_variances_all[[r]]<-var + H3_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_2_gblup_variances_all[[r]]<-list() + H3_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_2_gfblup_variances_all[[r]]<-var + H3_2_gfblup_prediction_all[[r]]<-pred + H3_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_2_gblup_variances_all,"H3_2_gblup_variances_all_3001_4000.rds") +saveRDS(H3_2_gblup_prediction_all,"H3_2_gblup_prediction_all_3001_4000.rds") +saveRDS(H3_2_gfblup_variances_all,"H3_2_gfblup_variances_all_3001_4000.rds") +saveRDS(H3_2_gfblup_prediction_all,"H3_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(H3_2_gfblup_validate_all,"H3_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..3f789d085ddaa2d81f166ee762b58cb06eae61bc --- /dev/null +++ b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_2_gblup_variances_all=rep(list(list()),cycles) +H3_2_gblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_variances_all=rep(list(list()),cycles) +H3_2_gfblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_2...") + y=1000000*pheno_df$H3_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_2_gblup_variances_all[[r]]<-var + H3_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_2_gblup_variances_all[[r]]<-list() + H3_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_2_gfblup_variances_all[[r]]<-var + H3_2_gfblup_prediction_all[[r]]<-pred + H3_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_2_gblup_variances_all,"H3_2_gblup_variances_all_4001_5000.rds") +saveRDS(H3_2_gblup_prediction_all,"H3_2_gblup_prediction_all_4001_5000.rds") +saveRDS(H3_2_gfblup_variances_all,"H3_2_gfblup_variances_all_4001_5000.rds") +saveRDS(H3_2_gfblup_prediction_all,"H3_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(H3_2_gfblup_validate_all,"H3_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..84224e3aef7362bb1c9c486416f65cc03e17ccd2 --- /dev/null +++ b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_2_gblup_variances_all=rep(list(list()),cycles) +H3_2_gblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_variances_all=rep(list(list()),cycles) +H3_2_gfblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_2...") + y=1000000*pheno_df$H3_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_2_gblup_variances_all[[r]]<-var + H3_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_2_gblup_variances_all[[r]]<-list() + H3_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_2_gfblup_variances_all[[r]]<-var + H3_2_gfblup_prediction_all[[r]]<-pred + H3_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_2_gblup_variances_all,"H3_2_gblup_variances_all_5001_6000.rds") +saveRDS(H3_2_gblup_prediction_all,"H3_2_gblup_prediction_all_5001_6000.rds") +saveRDS(H3_2_gfblup_variances_all,"H3_2_gfblup_variances_all_5001_6000.rds") +saveRDS(H3_2_gfblup_prediction_all,"H3_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(H3_2_gfblup_validate_all,"H3_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..0aba0c7d49aa739b5b469a18dd55421b031ea3e6 --- /dev/null +++ b/code/using_GO/psii/H3_2_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_2_gblup_variances_all=rep(list(list()),cycles) +H3_2_gblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_variances_all=rep(list(list()),cycles) +H3_2_gfblup_prediction_all=rep(list(list()),cycles) +H3_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_2...") + y=1000000*pheno_df$H3_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_2_gblup_variances_all[[r]]<-var + H3_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_2_gblup_variances_all[[r]]<-list() + H3_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_2_gfblup_variances_all[[r]]<-var + H3_2_gfblup_prediction_all[[r]]<-pred + H3_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_2_gblup_variances_all,"H3_2_gblup_variances_all_6001_7297.rds") +saveRDS(H3_2_gblup_prediction_all,"H3_2_gblup_prediction_all_6001_7297.rds") +saveRDS(H3_2_gfblup_variances_all,"H3_2_gfblup_variances_all_6001_7297.rds") +saveRDS(H3_2_gfblup_prediction_all,"H3_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(H3_2_gfblup_validate_all,"H3_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..08fb86f1819c249e5a8463385e77c7e9532e0f40 --- /dev/null +++ b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_3_gblup_variances_all=rep(list(list()),cycles) +H3_3_gblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_variances_all=rep(list(list()),cycles) +H3_3_gfblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_3...") + y=1000000*pheno_df$H3_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_3_gblup_variances_all[[r]]<-var + H3_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_3_gblup_variances_all[[r]]<-list() + H3_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_3_gfblup_variances_all[[r]]<-var + H3_3_gfblup_prediction_all[[r]]<-pred + H3_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_3_gblup_variances_all,"H3_3_gblup_variances_all_1001_2000.rds") +saveRDS(H3_3_gblup_prediction_all,"H3_3_gblup_prediction_all_1001_2000.rds") +saveRDS(H3_3_gfblup_variances_all,"H3_3_gfblup_variances_all_1001_2000.rds") +saveRDS(H3_3_gfblup_prediction_all,"H3_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(H3_3_gfblup_validate_all,"H3_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..8633fe2a1629fd666b7113c12558942ff37dc380 --- /dev/null +++ b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_3_gblup_variances_all=rep(list(list()),cycles) +H3_3_gblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_variances_all=rep(list(list()),cycles) +H3_3_gfblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_3...") + y=1000000*pheno_df$H3_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_3_gblup_variances_all[[r]]<-var + H3_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_3_gblup_variances_all[[r]]<-list() + H3_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_3_gfblup_variances_all[[r]]<-var + H3_3_gfblup_prediction_all[[r]]<-pred + H3_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_3_gblup_variances_all,"H3_3_gblup_variances_all_1_1000.rds") +saveRDS(H3_3_gblup_prediction_all,"H3_3_gblup_prediction_all_1_1000.rds") +saveRDS(H3_3_gfblup_variances_all,"H3_3_gfblup_variances_all_1_1000.rds") +saveRDS(H3_3_gfblup_prediction_all,"H3_3_gfblup_prediction_all_1_1000.rds") +saveRDS(H3_3_gfblup_validate_all,"H3_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..58a371a9a864d81a84d73807673a15d1261d54ce --- /dev/null +++ b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_3_gblup_variances_all=rep(list(list()),cycles) +H3_3_gblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_variances_all=rep(list(list()),cycles) +H3_3_gfblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_3...") + y=1000000*pheno_df$H3_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_3_gblup_variances_all[[r]]<-var + H3_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_3_gblup_variances_all[[r]]<-list() + H3_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_3_gfblup_variances_all[[r]]<-var + H3_3_gfblup_prediction_all[[r]]<-pred + H3_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_3_gblup_variances_all,"H3_3_gblup_variances_all_2001_3000.rds") +saveRDS(H3_3_gblup_prediction_all,"H3_3_gblup_prediction_all_2001_3000.rds") +saveRDS(H3_3_gfblup_variances_all,"H3_3_gfblup_variances_all_2001_3000.rds") +saveRDS(H3_3_gfblup_prediction_all,"H3_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(H3_3_gfblup_validate_all,"H3_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..1ab717deb3d63ea14f1b2b9667681c64606c5974 --- /dev/null +++ b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_3_gblup_variances_all=rep(list(list()),cycles) +H3_3_gblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_variances_all=rep(list(list()),cycles) +H3_3_gfblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_3...") + y=1000000*pheno_df$H3_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_3_gblup_variances_all[[r]]<-var + H3_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_3_gblup_variances_all[[r]]<-list() + H3_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_3_gfblup_variances_all[[r]]<-var + H3_3_gfblup_prediction_all[[r]]<-pred + H3_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_3_gblup_variances_all,"H3_3_gblup_variances_all_3001_4000.rds") +saveRDS(H3_3_gblup_prediction_all,"H3_3_gblup_prediction_all_3001_4000.rds") +saveRDS(H3_3_gfblup_variances_all,"H3_3_gfblup_variances_all_3001_4000.rds") +saveRDS(H3_3_gfblup_prediction_all,"H3_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(H3_3_gfblup_validate_all,"H3_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..5aaea33e4dda6d30151a227f04a48f76595186d1 --- /dev/null +++ b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_3_gblup_variances_all=rep(list(list()),cycles) +H3_3_gblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_variances_all=rep(list(list()),cycles) +H3_3_gfblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_3...") + y=1000000*pheno_df$H3_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_3_gblup_variances_all[[r]]<-var + H3_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_3_gblup_variances_all[[r]]<-list() + H3_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_3_gfblup_variances_all[[r]]<-var + H3_3_gfblup_prediction_all[[r]]<-pred + H3_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_3_gblup_variances_all,"H3_3_gblup_variances_all_4001_5000.rds") +saveRDS(H3_3_gblup_prediction_all,"H3_3_gblup_prediction_all_4001_5000.rds") +saveRDS(H3_3_gfblup_variances_all,"H3_3_gfblup_variances_all_4001_5000.rds") +saveRDS(H3_3_gfblup_prediction_all,"H3_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(H3_3_gfblup_validate_all,"H3_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..24df785dc7c091a2e503553361e6c85fb98063d1 --- /dev/null +++ b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_3_gblup_variances_all=rep(list(list()),cycles) +H3_3_gblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_variances_all=rep(list(list()),cycles) +H3_3_gfblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_3...") + y=1000000*pheno_df$H3_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_3_gblup_variances_all[[r]]<-var + H3_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_3_gblup_variances_all[[r]]<-list() + H3_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_3_gfblup_variances_all[[r]]<-var + H3_3_gfblup_prediction_all[[r]]<-pred + H3_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_3_gblup_variances_all,"H3_3_gblup_variances_all_5001_6000.rds") +saveRDS(H3_3_gblup_prediction_all,"H3_3_gblup_prediction_all_5001_6000.rds") +saveRDS(H3_3_gfblup_variances_all,"H3_3_gfblup_variances_all_5001_6000.rds") +saveRDS(H3_3_gfblup_prediction_all,"H3_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(H3_3_gfblup_validate_all,"H3_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..1ba87be9967058f626ca3e9b25aa79396b7aae4f --- /dev/null +++ b/code/using_GO/psii/H3_3_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H3_3_gblup_variances_all=rep(list(list()),cycles) +H3_3_gblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_variances_all=rep(list(list()),cycles) +H3_3_gfblup_prediction_all=rep(list(list()),cycles) +H3_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H3_3...") + y=1000000*pheno_df$H3_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H3_3_gblup_variances_all[[r]]<-var + H3_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H3_3_gblup_variances_all[[r]]<-list() + H3_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H3_3_gfblup_variances_all[[r]]<-var + H3_3_gfblup_prediction_all[[r]]<-pred + H3_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H3_3_gblup_variances_all,"H3_3_gblup_variances_all_6001_7297.rds") +saveRDS(H3_3_gblup_prediction_all,"H3_3_gblup_prediction_all_6001_7297.rds") +saveRDS(H3_3_gfblup_variances_all,"H3_3_gfblup_variances_all_6001_7297.rds") +saveRDS(H3_3_gfblup_prediction_all,"H3_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(H3_3_gfblup_validate_all,"H3_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..d30208b9ffa73fb57f2058c31a86a0717951ed4d --- /dev/null +++ b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_1_gblup_variances_all=rep(list(list()),cycles) +H4_1_gblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_variances_all=rep(list(list()),cycles) +H4_1_gfblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_1...") + y=1000000*pheno_df$H4_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_1_gblup_variances_all[[r]]<-var + H4_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_1_gblup_variances_all[[r]]<-list() + H4_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_1_gfblup_variances_all[[r]]<-var + H4_1_gfblup_prediction_all[[r]]<-pred + H4_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_1_gblup_variances_all,"H4_1_gblup_variances_all_1001_2000.rds") +saveRDS(H4_1_gblup_prediction_all,"H4_1_gblup_prediction_all_1001_2000.rds") +saveRDS(H4_1_gfblup_variances_all,"H4_1_gfblup_variances_all_1001_2000.rds") +saveRDS(H4_1_gfblup_prediction_all,"H4_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(H4_1_gfblup_validate_all,"H4_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..94fa8b58b75ab33f349498f908de090a08705544 --- /dev/null +++ b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_1_gblup_variances_all=rep(list(list()),cycles) +H4_1_gblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_variances_all=rep(list(list()),cycles) +H4_1_gfblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_1...") + y=1000000*pheno_df$H4_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_1_gblup_variances_all[[r]]<-var + H4_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_1_gblup_variances_all[[r]]<-list() + H4_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_1_gfblup_variances_all[[r]]<-var + H4_1_gfblup_prediction_all[[r]]<-pred + H4_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_1_gblup_variances_all,"H4_1_gblup_variances_all_1_1000.rds") +saveRDS(H4_1_gblup_prediction_all,"H4_1_gblup_prediction_all_1_1000.rds") +saveRDS(H4_1_gfblup_variances_all,"H4_1_gfblup_variances_all_1_1000.rds") +saveRDS(H4_1_gfblup_prediction_all,"H4_1_gfblup_prediction_all_1_1000.rds") +saveRDS(H4_1_gfblup_validate_all,"H4_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..c42111a56a2000e44b054d41c916028a7725dde9 --- /dev/null +++ b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_1_gblup_variances_all=rep(list(list()),cycles) +H4_1_gblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_variances_all=rep(list(list()),cycles) +H4_1_gfblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_1...") + y=1000000*pheno_df$H4_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_1_gblup_variances_all[[r]]<-var + H4_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_1_gblup_variances_all[[r]]<-list() + H4_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_1_gfblup_variances_all[[r]]<-var + H4_1_gfblup_prediction_all[[r]]<-pred + H4_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_1_gblup_variances_all,"H4_1_gblup_variances_all_2001_3000.rds") +saveRDS(H4_1_gblup_prediction_all,"H4_1_gblup_prediction_all_2001_3000.rds") +saveRDS(H4_1_gfblup_variances_all,"H4_1_gfblup_variances_all_2001_3000.rds") +saveRDS(H4_1_gfblup_prediction_all,"H4_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(H4_1_gfblup_validate_all,"H4_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..eccb649bd18202001e0d3cc412b3a0dd6d5ddd15 --- /dev/null +++ b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_1_gblup_variances_all=rep(list(list()),cycles) +H4_1_gblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_variances_all=rep(list(list()),cycles) +H4_1_gfblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_1...") + y=1000000*pheno_df$H4_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_1_gblup_variances_all[[r]]<-var + H4_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_1_gblup_variances_all[[r]]<-list() + H4_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_1_gfblup_variances_all[[r]]<-var + H4_1_gfblup_prediction_all[[r]]<-pred + H4_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_1_gblup_variances_all,"H4_1_gblup_variances_all_3001_4000.rds") +saveRDS(H4_1_gblup_prediction_all,"H4_1_gblup_prediction_all_3001_4000.rds") +saveRDS(H4_1_gfblup_variances_all,"H4_1_gfblup_variances_all_3001_4000.rds") +saveRDS(H4_1_gfblup_prediction_all,"H4_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(H4_1_gfblup_validate_all,"H4_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..745f454e1b6c15e384c65074268342548adc3528 --- /dev/null +++ b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_1_gblup_variances_all=rep(list(list()),cycles) +H4_1_gblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_variances_all=rep(list(list()),cycles) +H4_1_gfblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_1...") + y=1000000*pheno_df$H4_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_1_gblup_variances_all[[r]]<-var + H4_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_1_gblup_variances_all[[r]]<-list() + H4_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_1_gfblup_variances_all[[r]]<-var + H4_1_gfblup_prediction_all[[r]]<-pred + H4_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_1_gblup_variances_all,"H4_1_gblup_variances_all_4001_5000.rds") +saveRDS(H4_1_gblup_prediction_all,"H4_1_gblup_prediction_all_4001_5000.rds") +saveRDS(H4_1_gfblup_variances_all,"H4_1_gfblup_variances_all_4001_5000.rds") +saveRDS(H4_1_gfblup_prediction_all,"H4_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(H4_1_gfblup_validate_all,"H4_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..52ecf7f45fb20294f80d1d6471fc666de8f35c57 --- /dev/null +++ b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_1_gblup_variances_all=rep(list(list()),cycles) +H4_1_gblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_variances_all=rep(list(list()),cycles) +H4_1_gfblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_1...") + y=1000000*pheno_df$H4_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_1_gblup_variances_all[[r]]<-var + H4_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_1_gblup_variances_all[[r]]<-list() + H4_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_1_gfblup_variances_all[[r]]<-var + H4_1_gfblup_prediction_all[[r]]<-pred + H4_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_1_gblup_variances_all,"H4_1_gblup_variances_all_5001_6000.rds") +saveRDS(H4_1_gblup_prediction_all,"H4_1_gblup_prediction_all_5001_6000.rds") +saveRDS(H4_1_gfblup_variances_all,"H4_1_gfblup_variances_all_5001_6000.rds") +saveRDS(H4_1_gfblup_prediction_all,"H4_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(H4_1_gfblup_validate_all,"H4_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..bf37f55d3c11048e9c0aac3e2f4be0590a477571 --- /dev/null +++ b/code/using_GO/psii/H4_1_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_1_gblup_variances_all=rep(list(list()),cycles) +H4_1_gblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_variances_all=rep(list(list()),cycles) +H4_1_gfblup_prediction_all=rep(list(list()),cycles) +H4_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_1...") + y=1000000*pheno_df$H4_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_1_gblup_variances_all[[r]]<-var + H4_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_1_gblup_variances_all[[r]]<-list() + H4_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_1_gfblup_variances_all[[r]]<-var + H4_1_gfblup_prediction_all[[r]]<-pred + H4_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_1_gblup_variances_all,"H4_1_gblup_variances_all_6001_7297.rds") +saveRDS(H4_1_gblup_prediction_all,"H4_1_gblup_prediction_all_6001_7297.rds") +saveRDS(H4_1_gfblup_variances_all,"H4_1_gfblup_variances_all_6001_7297.rds") +saveRDS(H4_1_gfblup_prediction_all,"H4_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(H4_1_gfblup_validate_all,"H4_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..1b02146701c48e871dcff215fef342e881c93307 --- /dev/null +++ b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_2_gblup_variances_all=rep(list(list()),cycles) +H4_2_gblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_variances_all=rep(list(list()),cycles) +H4_2_gfblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_2...") + y=1000000*pheno_df$H4_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_2_gblup_variances_all[[r]]<-var + H4_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_2_gblup_variances_all[[r]]<-list() + H4_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_2_gfblup_variances_all[[r]]<-var + H4_2_gfblup_prediction_all[[r]]<-pred + H4_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_2_gblup_variances_all,"H4_2_gblup_variances_all_1001_2000.rds") +saveRDS(H4_2_gblup_prediction_all,"H4_2_gblup_prediction_all_1001_2000.rds") +saveRDS(H4_2_gfblup_variances_all,"H4_2_gfblup_variances_all_1001_2000.rds") +saveRDS(H4_2_gfblup_prediction_all,"H4_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(H4_2_gfblup_validate_all,"H4_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..86c03f8144c92e1336fa293fcadc6b604bc3af59 --- /dev/null +++ b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_2_gblup_variances_all=rep(list(list()),cycles) +H4_2_gblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_variances_all=rep(list(list()),cycles) +H4_2_gfblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_2...") + y=1000000*pheno_df$H4_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_2_gblup_variances_all[[r]]<-var + H4_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_2_gblup_variances_all[[r]]<-list() + H4_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_2_gfblup_variances_all[[r]]<-var + H4_2_gfblup_prediction_all[[r]]<-pred + H4_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_2_gblup_variances_all,"H4_2_gblup_variances_all_1_1000.rds") +saveRDS(H4_2_gblup_prediction_all,"H4_2_gblup_prediction_all_1_1000.rds") +saveRDS(H4_2_gfblup_variances_all,"H4_2_gfblup_variances_all_1_1000.rds") +saveRDS(H4_2_gfblup_prediction_all,"H4_2_gfblup_prediction_all_1_1000.rds") +saveRDS(H4_2_gfblup_validate_all,"H4_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..bda5e2553ae7605a3bfac53d6d25c5b2bcc906d5 --- /dev/null +++ b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_2_gblup_variances_all=rep(list(list()),cycles) +H4_2_gblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_variances_all=rep(list(list()),cycles) +H4_2_gfblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_2...") + y=1000000*pheno_df$H4_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_2_gblup_variances_all[[r]]<-var + H4_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_2_gblup_variances_all[[r]]<-list() + H4_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_2_gfblup_variances_all[[r]]<-var + H4_2_gfblup_prediction_all[[r]]<-pred + H4_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_2_gblup_variances_all,"H4_2_gblup_variances_all_2001_3000.rds") +saveRDS(H4_2_gblup_prediction_all,"H4_2_gblup_prediction_all_2001_3000.rds") +saveRDS(H4_2_gfblup_variances_all,"H4_2_gfblup_variances_all_2001_3000.rds") +saveRDS(H4_2_gfblup_prediction_all,"H4_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(H4_2_gfblup_validate_all,"H4_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..3db88cef356b8b4ce840abe0a0c20efbc283cd11 --- /dev/null +++ b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_2_gblup_variances_all=rep(list(list()),cycles) +H4_2_gblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_variances_all=rep(list(list()),cycles) +H4_2_gfblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_2...") + y=1000000*pheno_df$H4_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_2_gblup_variances_all[[r]]<-var + H4_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_2_gblup_variances_all[[r]]<-list() + H4_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_2_gfblup_variances_all[[r]]<-var + H4_2_gfblup_prediction_all[[r]]<-pred + H4_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_2_gblup_variances_all,"H4_2_gblup_variances_all_3001_4000.rds") +saveRDS(H4_2_gblup_prediction_all,"H4_2_gblup_prediction_all_3001_4000.rds") +saveRDS(H4_2_gfblup_variances_all,"H4_2_gfblup_variances_all_3001_4000.rds") +saveRDS(H4_2_gfblup_prediction_all,"H4_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(H4_2_gfblup_validate_all,"H4_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..8cf096dfbf83e17c746e4a40c286a343ec047864 --- /dev/null +++ b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_2_gblup_variances_all=rep(list(list()),cycles) +H4_2_gblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_variances_all=rep(list(list()),cycles) +H4_2_gfblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_2...") + y=1000000*pheno_df$H4_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_2_gblup_variances_all[[r]]<-var + H4_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_2_gblup_variances_all[[r]]<-list() + H4_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_2_gfblup_variances_all[[r]]<-var + H4_2_gfblup_prediction_all[[r]]<-pred + H4_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_2_gblup_variances_all,"H4_2_gblup_variances_all_4001_5000.rds") +saveRDS(H4_2_gblup_prediction_all,"H4_2_gblup_prediction_all_4001_5000.rds") +saveRDS(H4_2_gfblup_variances_all,"H4_2_gfblup_variances_all_4001_5000.rds") +saveRDS(H4_2_gfblup_prediction_all,"H4_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(H4_2_gfblup_validate_all,"H4_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..73b65b01a2cb7d577f36f140fe3cf5b091e5a435 --- /dev/null +++ b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_2_gblup_variances_all=rep(list(list()),cycles) +H4_2_gblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_variances_all=rep(list(list()),cycles) +H4_2_gfblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_2...") + y=1000000*pheno_df$H4_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_2_gblup_variances_all[[r]]<-var + H4_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_2_gblup_variances_all[[r]]<-list() + H4_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_2_gfblup_variances_all[[r]]<-var + H4_2_gfblup_prediction_all[[r]]<-pred + H4_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_2_gblup_variances_all,"H4_2_gblup_variances_all_5001_6000.rds") +saveRDS(H4_2_gblup_prediction_all,"H4_2_gblup_prediction_all_5001_6000.rds") +saveRDS(H4_2_gfblup_variances_all,"H4_2_gfblup_variances_all_5001_6000.rds") +saveRDS(H4_2_gfblup_prediction_all,"H4_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(H4_2_gfblup_validate_all,"H4_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..2f25999c45631134e7e53e0d55fc7fc8f9d9fb00 --- /dev/null +++ b/code/using_GO/psii/H4_2_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_2_gblup_variances_all=rep(list(list()),cycles) +H4_2_gblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_variances_all=rep(list(list()),cycles) +H4_2_gfblup_prediction_all=rep(list(list()),cycles) +H4_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_2...") + y=1000000*pheno_df$H4_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_2_gblup_variances_all[[r]]<-var + H4_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_2_gblup_variances_all[[r]]<-list() + H4_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_2_gfblup_variances_all[[r]]<-var + H4_2_gfblup_prediction_all[[r]]<-pred + H4_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_2_gblup_variances_all,"H4_2_gblup_variances_all_6001_7297.rds") +saveRDS(H4_2_gblup_prediction_all,"H4_2_gblup_prediction_all_6001_7297.rds") +saveRDS(H4_2_gfblup_variances_all,"H4_2_gfblup_variances_all_6001_7297.rds") +saveRDS(H4_2_gfblup_prediction_all,"H4_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(H4_2_gfblup_validate_all,"H4_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..4ec0b1c6e65cab68afc854661f001ac82d0b55fb --- /dev/null +++ b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_3_gblup_variances_all=rep(list(list()),cycles) +H4_3_gblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_variances_all=rep(list(list()),cycles) +H4_3_gfblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_3...") + y=1000000*pheno_df$H4_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_3_gblup_variances_all[[r]]<-var + H4_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_3_gblup_variances_all[[r]]<-list() + H4_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_3_gfblup_variances_all[[r]]<-var + H4_3_gfblup_prediction_all[[r]]<-pred + H4_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_3_gblup_variances_all,"H4_3_gblup_variances_all_1001_2000.rds") +saveRDS(H4_3_gblup_prediction_all,"H4_3_gblup_prediction_all_1001_2000.rds") +saveRDS(H4_3_gfblup_variances_all,"H4_3_gfblup_variances_all_1001_2000.rds") +saveRDS(H4_3_gfblup_prediction_all,"H4_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(H4_3_gfblup_validate_all,"H4_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..303a258c94e337c0b8ef1dca35fe7c3e699fb0c3 --- /dev/null +++ b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_3_gblup_variances_all=rep(list(list()),cycles) +H4_3_gblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_variances_all=rep(list(list()),cycles) +H4_3_gfblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_3...") + y=1000000*pheno_df$H4_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_3_gblup_variances_all[[r]]<-var + H4_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_3_gblup_variances_all[[r]]<-list() + H4_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_3_gfblup_variances_all[[r]]<-var + H4_3_gfblup_prediction_all[[r]]<-pred + H4_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_3_gblup_variances_all,"H4_3_gblup_variances_all_1_1000.rds") +saveRDS(H4_3_gblup_prediction_all,"H4_3_gblup_prediction_all_1_1000.rds") +saveRDS(H4_3_gfblup_variances_all,"H4_3_gfblup_variances_all_1_1000.rds") +saveRDS(H4_3_gfblup_prediction_all,"H4_3_gfblup_prediction_all_1_1000.rds") +saveRDS(H4_3_gfblup_validate_all,"H4_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..768b67199fff92cf79c18e472b9667ce6a361cd1 --- /dev/null +++ b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_3_gblup_variances_all=rep(list(list()),cycles) +H4_3_gblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_variances_all=rep(list(list()),cycles) +H4_3_gfblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_3...") + y=1000000*pheno_df$H4_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_3_gblup_variances_all[[r]]<-var + H4_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_3_gblup_variances_all[[r]]<-list() + H4_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_3_gfblup_variances_all[[r]]<-var + H4_3_gfblup_prediction_all[[r]]<-pred + H4_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_3_gblup_variances_all,"H4_3_gblup_variances_all_2001_3000.rds") +saveRDS(H4_3_gblup_prediction_all,"H4_3_gblup_prediction_all_2001_3000.rds") +saveRDS(H4_3_gfblup_variances_all,"H4_3_gfblup_variances_all_2001_3000.rds") +saveRDS(H4_3_gfblup_prediction_all,"H4_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(H4_3_gfblup_validate_all,"H4_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..6c1fb31e7da1a22c596abcf6c3a55186d76b7d47 --- /dev/null +++ b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_3_gblup_variances_all=rep(list(list()),cycles) +H4_3_gblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_variances_all=rep(list(list()),cycles) +H4_3_gfblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_3...") + y=1000000*pheno_df$H4_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_3_gblup_variances_all[[r]]<-var + H4_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_3_gblup_variances_all[[r]]<-list() + H4_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_3_gfblup_variances_all[[r]]<-var + H4_3_gfblup_prediction_all[[r]]<-pred + H4_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_3_gblup_variances_all,"H4_3_gblup_variances_all_3001_4000.rds") +saveRDS(H4_3_gblup_prediction_all,"H4_3_gblup_prediction_all_3001_4000.rds") +saveRDS(H4_3_gfblup_variances_all,"H4_3_gfblup_variances_all_3001_4000.rds") +saveRDS(H4_3_gfblup_prediction_all,"H4_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(H4_3_gfblup_validate_all,"H4_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..cbe7f8a15f2e419c29f029de251fbdefd9b425f6 --- /dev/null +++ b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_3_gblup_variances_all=rep(list(list()),cycles) +H4_3_gblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_variances_all=rep(list(list()),cycles) +H4_3_gfblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_3...") + y=1000000*pheno_df$H4_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_3_gblup_variances_all[[r]]<-var + H4_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_3_gblup_variances_all[[r]]<-list() + H4_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_3_gfblup_variances_all[[r]]<-var + H4_3_gfblup_prediction_all[[r]]<-pred + H4_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_3_gblup_variances_all,"H4_3_gblup_variances_all_4001_5000.rds") +saveRDS(H4_3_gblup_prediction_all,"H4_3_gblup_prediction_all_4001_5000.rds") +saveRDS(H4_3_gfblup_variances_all,"H4_3_gfblup_variances_all_4001_5000.rds") +saveRDS(H4_3_gfblup_prediction_all,"H4_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(H4_3_gfblup_validate_all,"H4_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..5c82873cd543da3c0dfec9193017951a73f8427f --- /dev/null +++ b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_3_gblup_variances_all=rep(list(list()),cycles) +H4_3_gblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_variances_all=rep(list(list()),cycles) +H4_3_gfblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_3...") + y=1000000*pheno_df$H4_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_3_gblup_variances_all[[r]]<-var + H4_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_3_gblup_variances_all[[r]]<-list() + H4_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_3_gfblup_variances_all[[r]]<-var + H4_3_gfblup_prediction_all[[r]]<-pred + H4_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_3_gblup_variances_all,"H4_3_gblup_variances_all_5001_6000.rds") +saveRDS(H4_3_gblup_prediction_all,"H4_3_gblup_prediction_all_5001_6000.rds") +saveRDS(H4_3_gfblup_variances_all,"H4_3_gfblup_variances_all_5001_6000.rds") +saveRDS(H4_3_gfblup_prediction_all,"H4_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(H4_3_gfblup_validate_all,"H4_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..941fe90a76a12fc2eed351ab450f0dee91475961 --- /dev/null +++ b/code/using_GO/psii/H4_3_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +H4_3_gblup_variances_all=rep(list(list()),cycles) +H4_3_gblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_variances_all=rep(list(list()),cycles) +H4_3_gfblup_prediction_all=rep(list(list()),cycles) +H4_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("H4_3...") + y=1000000*pheno_df$H4_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + H4_3_gblup_variances_all[[r]]<-var + H4_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + H4_3_gblup_variances_all[[r]]<-list() + H4_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + H4_3_gfblup_variances_all[[r]]<-var + H4_3_gfblup_prediction_all[[r]]<-pred + H4_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(H4_3_gblup_variances_all,"H4_3_gblup_variances_all_6001_7297.rds") +saveRDS(H4_3_gblup_prediction_all,"H4_3_gblup_prediction_all_6001_7297.rds") +saveRDS(H4_3_gfblup_variances_all,"H4_3_gfblup_variances_all_6001_7297.rds") +saveRDS(H4_3_gfblup_prediction_all,"H4_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(H4_3_gfblup_validate_all,"H4_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..a47142685a6e916fdcfea44a723fe5c243c7f282 --- /dev/null +++ b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all_1001_2000.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all_1001_2000.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all_1001_2000.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..c5fa2fa943c679fa9606b6f8dffddaba87edfe07 --- /dev/null +++ b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all_1_1000.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all_1_1000.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all_1_1000.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all_1_1000.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..1982134c453b4ae77c8cc9d9530bc18e757f05f1 --- /dev/null +++ b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all_2001_3000.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all_2001_3000.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all_2001_3000.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..533a735b39220830aa4df967cbf89a56dc641cf3 --- /dev/null +++ b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all_3001_4000.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all_3001_4000.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all_3001_4000.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..f0199c8f890c00d6d7e68ac68e082201d28a339f --- /dev/null +++ b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all_4001_5000.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all_4001_5000.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all_4001_5000.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..ae5e3bdfd0f41e19b7e49e27db4d6ad964ea8d2c --- /dev/null +++ b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all_5001_6000.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all_5001_6000.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all_5001_6000.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..40829c32922cc8562a83480bc53bb174846a1cc3 --- /dev/null +++ b/code/using_GO/psii/L1_1_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all_6001_7297.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all_6001_7297.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all_6001_7297.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..1fa3ae8ef57fbd36c1773576107fcdc820ffeb00 --- /dev/null +++ b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_2_gblup_variances_all=rep(list(list()),cycles) +L1_2_gblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_variances_all=rep(list(list()),cycles) +L1_2_gfblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_2...") + y=1000000*pheno_df$L1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_2_gblup_variances_all[[r]]<-var + L1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_2_gblup_variances_all[[r]]<-list() + L1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_2_gfblup_variances_all[[r]]<-var + L1_2_gfblup_prediction_all[[r]]<-pred + L1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_2_gblup_variances_all,"L1_2_gblup_variances_all_1001_2000.rds") +saveRDS(L1_2_gblup_prediction_all,"L1_2_gblup_prediction_all_1001_2000.rds") +saveRDS(L1_2_gfblup_variances_all,"L1_2_gfblup_variances_all_1001_2000.rds") +saveRDS(L1_2_gfblup_prediction_all,"L1_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(L1_2_gfblup_validate_all,"L1_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..75c8288b83103fdd93fed0fd195d3dae503f9108 --- /dev/null +++ b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_2_gblup_variances_all=rep(list(list()),cycles) +L1_2_gblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_variances_all=rep(list(list()),cycles) +L1_2_gfblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_2...") + y=1000000*pheno_df$L1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_2_gblup_variances_all[[r]]<-var + L1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_2_gblup_variances_all[[r]]<-list() + L1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_2_gfblup_variances_all[[r]]<-var + L1_2_gfblup_prediction_all[[r]]<-pred + L1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_2_gblup_variances_all,"L1_2_gblup_variances_all_1_1000.rds") +saveRDS(L1_2_gblup_prediction_all,"L1_2_gblup_prediction_all_1_1000.rds") +saveRDS(L1_2_gfblup_variances_all,"L1_2_gfblup_variances_all_1_1000.rds") +saveRDS(L1_2_gfblup_prediction_all,"L1_2_gfblup_prediction_all_1_1000.rds") +saveRDS(L1_2_gfblup_validate_all,"L1_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..3e0b466fcc03c01084cf58e4500c353f1729e871 --- /dev/null +++ b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_2_gblup_variances_all=rep(list(list()),cycles) +L1_2_gblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_variances_all=rep(list(list()),cycles) +L1_2_gfblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_2...") + y=1000000*pheno_df$L1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_2_gblup_variances_all[[r]]<-var + L1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_2_gblup_variances_all[[r]]<-list() + L1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_2_gfblup_variances_all[[r]]<-var + L1_2_gfblup_prediction_all[[r]]<-pred + L1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_2_gblup_variances_all,"L1_2_gblup_variances_all_2001_3000.rds") +saveRDS(L1_2_gblup_prediction_all,"L1_2_gblup_prediction_all_2001_3000.rds") +saveRDS(L1_2_gfblup_variances_all,"L1_2_gfblup_variances_all_2001_3000.rds") +saveRDS(L1_2_gfblup_prediction_all,"L1_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(L1_2_gfblup_validate_all,"L1_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..d56ce82165b8f4ac6eec4b8925c453681267127f --- /dev/null +++ b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_2_gblup_variances_all=rep(list(list()),cycles) +L1_2_gblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_variances_all=rep(list(list()),cycles) +L1_2_gfblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_2...") + y=1000000*pheno_df$L1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_2_gblup_variances_all[[r]]<-var + L1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_2_gblup_variances_all[[r]]<-list() + L1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_2_gfblup_variances_all[[r]]<-var + L1_2_gfblup_prediction_all[[r]]<-pred + L1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_2_gblup_variances_all,"L1_2_gblup_variances_all_3001_4000.rds") +saveRDS(L1_2_gblup_prediction_all,"L1_2_gblup_prediction_all_3001_4000.rds") +saveRDS(L1_2_gfblup_variances_all,"L1_2_gfblup_variances_all_3001_4000.rds") +saveRDS(L1_2_gfblup_prediction_all,"L1_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(L1_2_gfblup_validate_all,"L1_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..278a0154dde24e01c9da60e771afd0822c3404e6 --- /dev/null +++ b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_2_gblup_variances_all=rep(list(list()),cycles) +L1_2_gblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_variances_all=rep(list(list()),cycles) +L1_2_gfblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_2...") + y=1000000*pheno_df$L1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_2_gblup_variances_all[[r]]<-var + L1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_2_gblup_variances_all[[r]]<-list() + L1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_2_gfblup_variances_all[[r]]<-var + L1_2_gfblup_prediction_all[[r]]<-pred + L1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_2_gblup_variances_all,"L1_2_gblup_variances_all_4001_5000.rds") +saveRDS(L1_2_gblup_prediction_all,"L1_2_gblup_prediction_all_4001_5000.rds") +saveRDS(L1_2_gfblup_variances_all,"L1_2_gfblup_variances_all_4001_5000.rds") +saveRDS(L1_2_gfblup_prediction_all,"L1_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(L1_2_gfblup_validate_all,"L1_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..b25d2f0baafbf0c13ced04f3f00f820c88cc957f --- /dev/null +++ b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_2_gblup_variances_all=rep(list(list()),cycles) +L1_2_gblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_variances_all=rep(list(list()),cycles) +L1_2_gfblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_2...") + y=1000000*pheno_df$L1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_2_gblup_variances_all[[r]]<-var + L1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_2_gblup_variances_all[[r]]<-list() + L1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_2_gfblup_variances_all[[r]]<-var + L1_2_gfblup_prediction_all[[r]]<-pred + L1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_2_gblup_variances_all,"L1_2_gblup_variances_all_5001_6000.rds") +saveRDS(L1_2_gblup_prediction_all,"L1_2_gblup_prediction_all_5001_6000.rds") +saveRDS(L1_2_gfblup_variances_all,"L1_2_gfblup_variances_all_5001_6000.rds") +saveRDS(L1_2_gfblup_prediction_all,"L1_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(L1_2_gfblup_validate_all,"L1_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..09291608e52a59f5287a927eab4830664bfde147 --- /dev/null +++ b/code/using_GO/psii/L1_2_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_2_gblup_variances_all=rep(list(list()),cycles) +L1_2_gblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_variances_all=rep(list(list()),cycles) +L1_2_gfblup_prediction_all=rep(list(list()),cycles) +L1_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_2...") + y=1000000*pheno_df$L1_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_2_gblup_variances_all[[r]]<-var + L1_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_2_gblup_variances_all[[r]]<-list() + L1_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_2_gfblup_variances_all[[r]]<-var + L1_2_gfblup_prediction_all[[r]]<-pred + L1_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_2_gblup_variances_all,"L1_2_gblup_variances_all_6001_7297.rds") +saveRDS(L1_2_gblup_prediction_all,"L1_2_gblup_prediction_all_6001_7297.rds") +saveRDS(L1_2_gfblup_variances_all,"L1_2_gfblup_variances_all_6001_7297.rds") +saveRDS(L1_2_gfblup_prediction_all,"L1_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(L1_2_gfblup_validate_all,"L1_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..01c1191822d4ddad62a9e6027f02753fbced9b9b --- /dev/null +++ b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_3_gblup_variances_all=rep(list(list()),cycles) +L1_3_gblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_variances_all=rep(list(list()),cycles) +L1_3_gfblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_3...") + y=1000000*pheno_df$L1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_3_gblup_variances_all[[r]]<-var + L1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_3_gblup_variances_all[[r]]<-list() + L1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_3_gfblup_variances_all[[r]]<-var + L1_3_gfblup_prediction_all[[r]]<-pred + L1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_3_gblup_variances_all,"L1_3_gblup_variances_all_1001_2000.rds") +saveRDS(L1_3_gblup_prediction_all,"L1_3_gblup_prediction_all_1001_2000.rds") +saveRDS(L1_3_gfblup_variances_all,"L1_3_gfblup_variances_all_1001_2000.rds") +saveRDS(L1_3_gfblup_prediction_all,"L1_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(L1_3_gfblup_validate_all,"L1_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..6f0abf9a67b4bd30a3a623d4ae0ee40d90ebfa7e --- /dev/null +++ b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_3_gblup_variances_all=rep(list(list()),cycles) +L1_3_gblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_variances_all=rep(list(list()),cycles) +L1_3_gfblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_3...") + y=1000000*pheno_df$L1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_3_gblup_variances_all[[r]]<-var + L1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_3_gblup_variances_all[[r]]<-list() + L1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_3_gfblup_variances_all[[r]]<-var + L1_3_gfblup_prediction_all[[r]]<-pred + L1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_3_gblup_variances_all,"L1_3_gblup_variances_all_1_1000.rds") +saveRDS(L1_3_gblup_prediction_all,"L1_3_gblup_prediction_all_1_1000.rds") +saveRDS(L1_3_gfblup_variances_all,"L1_3_gfblup_variances_all_1_1000.rds") +saveRDS(L1_3_gfblup_prediction_all,"L1_3_gfblup_prediction_all_1_1000.rds") +saveRDS(L1_3_gfblup_validate_all,"L1_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..c46504578eb7dd7f4c65dd7a56288761661cef3e --- /dev/null +++ b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_3_gblup_variances_all=rep(list(list()),cycles) +L1_3_gblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_variances_all=rep(list(list()),cycles) +L1_3_gfblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_3...") + y=1000000*pheno_df$L1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_3_gblup_variances_all[[r]]<-var + L1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_3_gblup_variances_all[[r]]<-list() + L1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_3_gfblup_variances_all[[r]]<-var + L1_3_gfblup_prediction_all[[r]]<-pred + L1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_3_gblup_variances_all,"L1_3_gblup_variances_all_2001_3000.rds") +saveRDS(L1_3_gblup_prediction_all,"L1_3_gblup_prediction_all_2001_3000.rds") +saveRDS(L1_3_gfblup_variances_all,"L1_3_gfblup_variances_all_2001_3000.rds") +saveRDS(L1_3_gfblup_prediction_all,"L1_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(L1_3_gfblup_validate_all,"L1_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..084d8fc84a58bc9689016359d5bb3eab77da37dd --- /dev/null +++ b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_3_gblup_variances_all=rep(list(list()),cycles) +L1_3_gblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_variances_all=rep(list(list()),cycles) +L1_3_gfblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_3...") + y=1000000*pheno_df$L1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_3_gblup_variances_all[[r]]<-var + L1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_3_gblup_variances_all[[r]]<-list() + L1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_3_gfblup_variances_all[[r]]<-var + L1_3_gfblup_prediction_all[[r]]<-pred + L1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_3_gblup_variances_all,"L1_3_gblup_variances_all_3001_4000.rds") +saveRDS(L1_3_gblup_prediction_all,"L1_3_gblup_prediction_all_3001_4000.rds") +saveRDS(L1_3_gfblup_variances_all,"L1_3_gfblup_variances_all_3001_4000.rds") +saveRDS(L1_3_gfblup_prediction_all,"L1_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(L1_3_gfblup_validate_all,"L1_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..c0e0edb09cbae45dcf0127c9e8df857cba3c7d60 --- /dev/null +++ b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_3_gblup_variances_all=rep(list(list()),cycles) +L1_3_gblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_variances_all=rep(list(list()),cycles) +L1_3_gfblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_3...") + y=1000000*pheno_df$L1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_3_gblup_variances_all[[r]]<-var + L1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_3_gblup_variances_all[[r]]<-list() + L1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_3_gfblup_variances_all[[r]]<-var + L1_3_gfblup_prediction_all[[r]]<-pred + L1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_3_gblup_variances_all,"L1_3_gblup_variances_all_4001_5000.rds") +saveRDS(L1_3_gblup_prediction_all,"L1_3_gblup_prediction_all_4001_5000.rds") +saveRDS(L1_3_gfblup_variances_all,"L1_3_gfblup_variances_all_4001_5000.rds") +saveRDS(L1_3_gfblup_prediction_all,"L1_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(L1_3_gfblup_validate_all,"L1_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..6fdf2635565be9367cd41b54074ce4a7a22b293a --- /dev/null +++ b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_3_gblup_variances_all=rep(list(list()),cycles) +L1_3_gblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_variances_all=rep(list(list()),cycles) +L1_3_gfblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_3...") + y=1000000*pheno_df$L1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_3_gblup_variances_all[[r]]<-var + L1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_3_gblup_variances_all[[r]]<-list() + L1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_3_gfblup_variances_all[[r]]<-var + L1_3_gfblup_prediction_all[[r]]<-pred + L1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_3_gblup_variances_all,"L1_3_gblup_variances_all_5001_6000.rds") +saveRDS(L1_3_gblup_prediction_all,"L1_3_gblup_prediction_all_5001_6000.rds") +saveRDS(L1_3_gfblup_variances_all,"L1_3_gfblup_variances_all_5001_6000.rds") +saveRDS(L1_3_gfblup_prediction_all,"L1_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(L1_3_gfblup_validate_all,"L1_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..a88ffbbb5c9c33c8cba9ae1bad606ca8b4101f5b --- /dev/null +++ b/code/using_GO/psii/L1_3_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_3_gblup_variances_all=rep(list(list()),cycles) +L1_3_gblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_variances_all=rep(list(list()),cycles) +L1_3_gfblup_prediction_all=rep(list(list()),cycles) +L1_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_3...") + y=1000000*pheno_df$L1_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_3_gblup_variances_all[[r]]<-var + L1_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_3_gblup_variances_all[[r]]<-list() + L1_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_3_gfblup_variances_all[[r]]<-var + L1_3_gfblup_prediction_all[[r]]<-pred + L1_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_3_gblup_variances_all,"L1_3_gblup_variances_all_6001_7297.rds") +saveRDS(L1_3_gblup_prediction_all,"L1_3_gblup_prediction_all_6001_7297.rds") +saveRDS(L1_3_gfblup_variances_all,"L1_3_gfblup_variances_all_6001_7297.rds") +saveRDS(L1_3_gfblup_prediction_all,"L1_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(L1_3_gfblup_validate_all,"L1_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..e0dfdcf9c8b1fc6f9dd7c9f7d00070623e111b10 --- /dev/null +++ b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_1_gblup_variances_all=rep(list(list()),cycles) +L2_1_gblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_variances_all=rep(list(list()),cycles) +L2_1_gfblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_1...") + y=1000000*pheno_df$L2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_1_gblup_variances_all[[r]]<-var + L2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_1_gblup_variances_all[[r]]<-list() + L2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_1_gfblup_variances_all[[r]]<-var + L2_1_gfblup_prediction_all[[r]]<-pred + L2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_1_gblup_variances_all,"L2_1_gblup_variances_all_1001_2000.rds") +saveRDS(L2_1_gblup_prediction_all,"L2_1_gblup_prediction_all_1001_2000.rds") +saveRDS(L2_1_gfblup_variances_all,"L2_1_gfblup_variances_all_1001_2000.rds") +saveRDS(L2_1_gfblup_prediction_all,"L2_1_gfblup_prediction_all_1001_2000.rds") +saveRDS(L2_1_gfblup_validate_all,"L2_1_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..7844939170a7df68b01565ec5acd3c27ab9b4000 --- /dev/null +++ b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_1_gblup_variances_all=rep(list(list()),cycles) +L2_1_gblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_variances_all=rep(list(list()),cycles) +L2_1_gfblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_1...") + y=1000000*pheno_df$L2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_1_gblup_variances_all[[r]]<-var + L2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_1_gblup_variances_all[[r]]<-list() + L2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_1_gfblup_variances_all[[r]]<-var + L2_1_gfblup_prediction_all[[r]]<-pred + L2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_1_gblup_variances_all,"L2_1_gblup_variances_all_1_1000.rds") +saveRDS(L2_1_gblup_prediction_all,"L2_1_gblup_prediction_all_1_1000.rds") +saveRDS(L2_1_gfblup_variances_all,"L2_1_gfblup_variances_all_1_1000.rds") +saveRDS(L2_1_gfblup_prediction_all,"L2_1_gfblup_prediction_all_1_1000.rds") +saveRDS(L2_1_gfblup_validate_all,"L2_1_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..c4e9a6d10e4be76fb2cd26840909a6257fe0a38b --- /dev/null +++ b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_1_gblup_variances_all=rep(list(list()),cycles) +L2_1_gblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_variances_all=rep(list(list()),cycles) +L2_1_gfblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_1...") + y=1000000*pheno_df$L2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_1_gblup_variances_all[[r]]<-var + L2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_1_gblup_variances_all[[r]]<-list() + L2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_1_gfblup_variances_all[[r]]<-var + L2_1_gfblup_prediction_all[[r]]<-pred + L2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_1_gblup_variances_all,"L2_1_gblup_variances_all_2001_3000.rds") +saveRDS(L2_1_gblup_prediction_all,"L2_1_gblup_prediction_all_2001_3000.rds") +saveRDS(L2_1_gfblup_variances_all,"L2_1_gfblup_variances_all_2001_3000.rds") +saveRDS(L2_1_gfblup_prediction_all,"L2_1_gfblup_prediction_all_2001_3000.rds") +saveRDS(L2_1_gfblup_validate_all,"L2_1_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..a28bd6c2640b982fe4cec3465c1a099e1849ae40 --- /dev/null +++ b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_1_gblup_variances_all=rep(list(list()),cycles) +L2_1_gblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_variances_all=rep(list(list()),cycles) +L2_1_gfblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_1...") + y=1000000*pheno_df$L2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_1_gblup_variances_all[[r]]<-var + L2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_1_gblup_variances_all[[r]]<-list() + L2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_1_gfblup_variances_all[[r]]<-var + L2_1_gfblup_prediction_all[[r]]<-pred + L2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_1_gblup_variances_all,"L2_1_gblup_variances_all_3001_4000.rds") +saveRDS(L2_1_gblup_prediction_all,"L2_1_gblup_prediction_all_3001_4000.rds") +saveRDS(L2_1_gfblup_variances_all,"L2_1_gfblup_variances_all_3001_4000.rds") +saveRDS(L2_1_gfblup_prediction_all,"L2_1_gfblup_prediction_all_3001_4000.rds") +saveRDS(L2_1_gfblup_validate_all,"L2_1_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..e9f2d66e67c387ee244d78b13293d981689336ac --- /dev/null +++ b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_1_gblup_variances_all=rep(list(list()),cycles) +L2_1_gblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_variances_all=rep(list(list()),cycles) +L2_1_gfblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_1...") + y=1000000*pheno_df$L2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_1_gblup_variances_all[[r]]<-var + L2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_1_gblup_variances_all[[r]]<-list() + L2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_1_gfblup_variances_all[[r]]<-var + L2_1_gfblup_prediction_all[[r]]<-pred + L2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_1_gblup_variances_all,"L2_1_gblup_variances_all_4001_5000.rds") +saveRDS(L2_1_gblup_prediction_all,"L2_1_gblup_prediction_all_4001_5000.rds") +saveRDS(L2_1_gfblup_variances_all,"L2_1_gfblup_variances_all_4001_5000.rds") +saveRDS(L2_1_gfblup_prediction_all,"L2_1_gfblup_prediction_all_4001_5000.rds") +saveRDS(L2_1_gfblup_validate_all,"L2_1_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..edec7a2e31647c2dc26079347f947414d6e6d510 --- /dev/null +++ b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_1_gblup_variances_all=rep(list(list()),cycles) +L2_1_gblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_variances_all=rep(list(list()),cycles) +L2_1_gfblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_1...") + y=1000000*pheno_df$L2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_1_gblup_variances_all[[r]]<-var + L2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_1_gblup_variances_all[[r]]<-list() + L2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_1_gfblup_variances_all[[r]]<-var + L2_1_gfblup_prediction_all[[r]]<-pred + L2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_1_gblup_variances_all,"L2_1_gblup_variances_all_5001_6000.rds") +saveRDS(L2_1_gblup_prediction_all,"L2_1_gblup_prediction_all_5001_6000.rds") +saveRDS(L2_1_gfblup_variances_all,"L2_1_gfblup_variances_all_5001_6000.rds") +saveRDS(L2_1_gfblup_prediction_all,"L2_1_gfblup_prediction_all_5001_6000.rds") +saveRDS(L2_1_gfblup_validate_all,"L2_1_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..903828219fad8a2483f36721a116726406e0d8fc --- /dev/null +++ b/code/using_GO/psii/L2_1_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_1_gblup_variances_all=rep(list(list()),cycles) +L2_1_gblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_variances_all=rep(list(list()),cycles) +L2_1_gfblup_prediction_all=rep(list(list()),cycles) +L2_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_1...") + y=1000000*pheno_df$L2_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_1_gblup_variances_all[[r]]<-var + L2_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_1_gblup_variances_all[[r]]<-list() + L2_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_1_gfblup_variances_all[[r]]<-var + L2_1_gfblup_prediction_all[[r]]<-pred + L2_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_1_gblup_variances_all,"L2_1_gblup_variances_all_6001_7297.rds") +saveRDS(L2_1_gblup_prediction_all,"L2_1_gblup_prediction_all_6001_7297.rds") +saveRDS(L2_1_gfblup_variances_all,"L2_1_gfblup_variances_all_6001_7297.rds") +saveRDS(L2_1_gfblup_prediction_all,"L2_1_gfblup_prediction_all_6001_7297.rds") +saveRDS(L2_1_gfblup_validate_all,"L2_1_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..949542d5c1d57b3cfd1f845e5299e11a25042556 --- /dev/null +++ b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_2_gblup_variances_all=rep(list(list()),cycles) +L2_2_gblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_variances_all=rep(list(list()),cycles) +L2_2_gfblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_2...") + y=1000000*pheno_df$L2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_2_gblup_variances_all[[r]]<-var + L2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_2_gblup_variances_all[[r]]<-list() + L2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_2_gfblup_variances_all[[r]]<-var + L2_2_gfblup_prediction_all[[r]]<-pred + L2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_2_gblup_variances_all,"L2_2_gblup_variances_all_1001_2000.rds") +saveRDS(L2_2_gblup_prediction_all,"L2_2_gblup_prediction_all_1001_2000.rds") +saveRDS(L2_2_gfblup_variances_all,"L2_2_gfblup_variances_all_1001_2000.rds") +saveRDS(L2_2_gfblup_prediction_all,"L2_2_gfblup_prediction_all_1001_2000.rds") +saveRDS(L2_2_gfblup_validate_all,"L2_2_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..885fa6dd281393861a00317391311e9b72076b7f --- /dev/null +++ b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_2_gblup_variances_all=rep(list(list()),cycles) +L2_2_gblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_variances_all=rep(list(list()),cycles) +L2_2_gfblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_2...") + y=1000000*pheno_df$L2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_2_gblup_variances_all[[r]]<-var + L2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_2_gblup_variances_all[[r]]<-list() + L2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_2_gfblup_variances_all[[r]]<-var + L2_2_gfblup_prediction_all[[r]]<-pred + L2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_2_gblup_variances_all,"L2_2_gblup_variances_all_1_1000.rds") +saveRDS(L2_2_gblup_prediction_all,"L2_2_gblup_prediction_all_1_1000.rds") +saveRDS(L2_2_gfblup_variances_all,"L2_2_gfblup_variances_all_1_1000.rds") +saveRDS(L2_2_gfblup_prediction_all,"L2_2_gfblup_prediction_all_1_1000.rds") +saveRDS(L2_2_gfblup_validate_all,"L2_2_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..fb5d6b445bf1262e4043d69035c1b55912020167 --- /dev/null +++ b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_2_gblup_variances_all=rep(list(list()),cycles) +L2_2_gblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_variances_all=rep(list(list()),cycles) +L2_2_gfblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_2...") + y=1000000*pheno_df$L2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_2_gblup_variances_all[[r]]<-var + L2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_2_gblup_variances_all[[r]]<-list() + L2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_2_gfblup_variances_all[[r]]<-var + L2_2_gfblup_prediction_all[[r]]<-pred + L2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_2_gblup_variances_all,"L2_2_gblup_variances_all_2001_3000.rds") +saveRDS(L2_2_gblup_prediction_all,"L2_2_gblup_prediction_all_2001_3000.rds") +saveRDS(L2_2_gfblup_variances_all,"L2_2_gfblup_variances_all_2001_3000.rds") +saveRDS(L2_2_gfblup_prediction_all,"L2_2_gfblup_prediction_all_2001_3000.rds") +saveRDS(L2_2_gfblup_validate_all,"L2_2_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..e3e06119fe767e21b9791c58bc0b11ad4883e9e5 --- /dev/null +++ b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_2_gblup_variances_all=rep(list(list()),cycles) +L2_2_gblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_variances_all=rep(list(list()),cycles) +L2_2_gfblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_2...") + y=1000000*pheno_df$L2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_2_gblup_variances_all[[r]]<-var + L2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_2_gblup_variances_all[[r]]<-list() + L2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_2_gfblup_variances_all[[r]]<-var + L2_2_gfblup_prediction_all[[r]]<-pred + L2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_2_gblup_variances_all,"L2_2_gblup_variances_all_3001_4000.rds") +saveRDS(L2_2_gblup_prediction_all,"L2_2_gblup_prediction_all_3001_4000.rds") +saveRDS(L2_2_gfblup_variances_all,"L2_2_gfblup_variances_all_3001_4000.rds") +saveRDS(L2_2_gfblup_prediction_all,"L2_2_gfblup_prediction_all_3001_4000.rds") +saveRDS(L2_2_gfblup_validate_all,"L2_2_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..e3d3e93b28ae8522ed5364728144638579362afd --- /dev/null +++ b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_2_gblup_variances_all=rep(list(list()),cycles) +L2_2_gblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_variances_all=rep(list(list()),cycles) +L2_2_gfblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_2...") + y=1000000*pheno_df$L2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_2_gblup_variances_all[[r]]<-var + L2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_2_gblup_variances_all[[r]]<-list() + L2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_2_gfblup_variances_all[[r]]<-var + L2_2_gfblup_prediction_all[[r]]<-pred + L2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_2_gblup_variances_all,"L2_2_gblup_variances_all_4001_5000.rds") +saveRDS(L2_2_gblup_prediction_all,"L2_2_gblup_prediction_all_4001_5000.rds") +saveRDS(L2_2_gfblup_variances_all,"L2_2_gfblup_variances_all_4001_5000.rds") +saveRDS(L2_2_gfblup_prediction_all,"L2_2_gfblup_prediction_all_4001_5000.rds") +saveRDS(L2_2_gfblup_validate_all,"L2_2_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..8e7589287ba80fdcb27c535031b28037d4f46518 --- /dev/null +++ b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_2_gblup_variances_all=rep(list(list()),cycles) +L2_2_gblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_variances_all=rep(list(list()),cycles) +L2_2_gfblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_2...") + y=1000000*pheno_df$L2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_2_gblup_variances_all[[r]]<-var + L2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_2_gblup_variances_all[[r]]<-list() + L2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_2_gfblup_variances_all[[r]]<-var + L2_2_gfblup_prediction_all[[r]]<-pred + L2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_2_gblup_variances_all,"L2_2_gblup_variances_all_5001_6000.rds") +saveRDS(L2_2_gblup_prediction_all,"L2_2_gblup_prediction_all_5001_6000.rds") +saveRDS(L2_2_gfblup_variances_all,"L2_2_gfblup_variances_all_5001_6000.rds") +saveRDS(L2_2_gfblup_prediction_all,"L2_2_gfblup_prediction_all_5001_6000.rds") +saveRDS(L2_2_gfblup_validate_all,"L2_2_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..d50a9c28db98143e2ca71bcd9e62b7bb497cb813 --- /dev/null +++ b/code/using_GO/psii/L2_2_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_2_gblup_variances_all=rep(list(list()),cycles) +L2_2_gblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_variances_all=rep(list(list()),cycles) +L2_2_gfblup_prediction_all=rep(list(list()),cycles) +L2_2_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_2...") + y=1000000*pheno_df$L2_2 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_2_gblup_variances_all[[r]]<-var + L2_2_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_2_gblup_variances_all[[r]]<-list() + L2_2_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_2_gfblup_variances_all[[r]]<-var + L2_2_gfblup_prediction_all[[r]]<-pred + L2_2_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_2_gblup_variances_all,"L2_2_gblup_variances_all_6001_7297.rds") +saveRDS(L2_2_gblup_prediction_all,"L2_2_gblup_prediction_all_6001_7297.rds") +saveRDS(L2_2_gfblup_variances_all,"L2_2_gfblup_variances_all_6001_7297.rds") +saveRDS(L2_2_gfblup_prediction_all,"L2_2_gfblup_prediction_all_6001_7297.rds") +saveRDS(L2_2_gfblup_validate_all,"L2_2_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_1001_2000.R b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_1001_2000.R new file mode 100644 index 0000000000000000000000000000000000000000..89d2e39ab8c8dd951ebd5acdf35a110d03b2b2da --- /dev/null +++ b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_1001_2000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1001:2000] +rGF_filtered<-rGF_filtered[1001:2000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_3_gblup_variances_all=rep(list(list()),cycles) +L2_3_gblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_variances_all=rep(list(list()),cycles) +L2_3_gfblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_3...") + y=1000000*pheno_df$L2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_3_gblup_variances_all[[r]]<-var + L2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_3_gblup_variances_all[[r]]<-list() + L2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_3_gfblup_variances_all[[r]]<-var + L2_3_gfblup_prediction_all[[r]]<-pred + L2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_3_gblup_variances_all,"L2_3_gblup_variances_all_1001_2000.rds") +saveRDS(L2_3_gblup_prediction_all,"L2_3_gblup_prediction_all_1001_2000.rds") +saveRDS(L2_3_gfblup_variances_all,"L2_3_gfblup_variances_all_1001_2000.rds") +saveRDS(L2_3_gfblup_prediction_all,"L2_3_gfblup_prediction_all_1001_2000.rds") +saveRDS(L2_3_gfblup_validate_all,"L2_3_gfblup_validate_all_1001_2000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_1_1000.R b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_1_1000.R new file mode 100644 index 0000000000000000000000000000000000000000..db84ba07ebea7ed91d643829ad5cde591bbe763a --- /dev/null +++ b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_1_1000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[1:1000] +rGF_filtered<-rGF_filtered[1:1000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_3_gblup_variances_all=rep(list(list()),cycles) +L2_3_gblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_variances_all=rep(list(list()),cycles) +L2_3_gfblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_3...") + y=1000000*pheno_df$L2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_3_gblup_variances_all[[r]]<-var + L2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_3_gblup_variances_all[[r]]<-list() + L2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_3_gfblup_variances_all[[r]]<-var + L2_3_gfblup_prediction_all[[r]]<-pred + L2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_3_gblup_variances_all,"L2_3_gblup_variances_all_1_1000.rds") +saveRDS(L2_3_gblup_prediction_all,"L2_3_gblup_prediction_all_1_1000.rds") +saveRDS(L2_3_gfblup_variances_all,"L2_3_gfblup_variances_all_1_1000.rds") +saveRDS(L2_3_gfblup_prediction_all,"L2_3_gfblup_prediction_all_1_1000.rds") +saveRDS(L2_3_gfblup_validate_all,"L2_3_gfblup_validate_all_1_1000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_2001_3000.R b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_2001_3000.R new file mode 100644 index 0000000000000000000000000000000000000000..0a3b8bdfe6b726090b148890af384afce751e398 --- /dev/null +++ b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_2001_3000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[2001:3000] +rGF_filtered<-rGF_filtered[2001:3000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_3_gblup_variances_all=rep(list(list()),cycles) +L2_3_gblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_variances_all=rep(list(list()),cycles) +L2_3_gfblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_3...") + y=1000000*pheno_df$L2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_3_gblup_variances_all[[r]]<-var + L2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_3_gblup_variances_all[[r]]<-list() + L2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_3_gfblup_variances_all[[r]]<-var + L2_3_gfblup_prediction_all[[r]]<-pred + L2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_3_gblup_variances_all,"L2_3_gblup_variances_all_2001_3000.rds") +saveRDS(L2_3_gblup_prediction_all,"L2_3_gblup_prediction_all_2001_3000.rds") +saveRDS(L2_3_gfblup_variances_all,"L2_3_gfblup_variances_all_2001_3000.rds") +saveRDS(L2_3_gfblup_prediction_all,"L2_3_gfblup_prediction_all_2001_3000.rds") +saveRDS(L2_3_gfblup_validate_all,"L2_3_gfblup_validate_all_2001_3000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_3001_4000.R b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_3001_4000.R new file mode 100644 index 0000000000000000000000000000000000000000..767d1aec33ee3627dd54b34f27909572d6b0722d --- /dev/null +++ b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_3001_4000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[3001:4000] +rGF_filtered<-rGF_filtered[3001:4000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_3_gblup_variances_all=rep(list(list()),cycles) +L2_3_gblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_variances_all=rep(list(list()),cycles) +L2_3_gfblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_3...") + y=1000000*pheno_df$L2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_3_gblup_variances_all[[r]]<-var + L2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_3_gblup_variances_all[[r]]<-list() + L2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_3_gfblup_variances_all[[r]]<-var + L2_3_gfblup_prediction_all[[r]]<-pred + L2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_3_gblup_variances_all,"L2_3_gblup_variances_all_3001_4000.rds") +saveRDS(L2_3_gblup_prediction_all,"L2_3_gblup_prediction_all_3001_4000.rds") +saveRDS(L2_3_gfblup_variances_all,"L2_3_gfblup_variances_all_3001_4000.rds") +saveRDS(L2_3_gfblup_prediction_all,"L2_3_gfblup_prediction_all_3001_4000.rds") +saveRDS(L2_3_gfblup_validate_all,"L2_3_gfblup_validate_all_3001_4000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_4001_5000.R b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_4001_5000.R new file mode 100644 index 0000000000000000000000000000000000000000..57d2e15d88d0bb93a0d7485b895c3524cf55801f --- /dev/null +++ b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_4001_5000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[4001:5000] +rGF_filtered<-rGF_filtered[4001:5000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_3_gblup_variances_all=rep(list(list()),cycles) +L2_3_gblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_variances_all=rep(list(list()),cycles) +L2_3_gfblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_3...") + y=1000000*pheno_df$L2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_3_gblup_variances_all[[r]]<-var + L2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_3_gblup_variances_all[[r]]<-list() + L2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_3_gfblup_variances_all[[r]]<-var + L2_3_gfblup_prediction_all[[r]]<-pred + L2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_3_gblup_variances_all,"L2_3_gblup_variances_all_4001_5000.rds") +saveRDS(L2_3_gblup_prediction_all,"L2_3_gblup_prediction_all_4001_5000.rds") +saveRDS(L2_3_gfblup_variances_all,"L2_3_gfblup_variances_all_4001_5000.rds") +saveRDS(L2_3_gfblup_prediction_all,"L2_3_gfblup_prediction_all_4001_5000.rds") +saveRDS(L2_3_gfblup_validate_all,"L2_3_gfblup_validate_all_4001_5000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_5001_6000.R b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_5001_6000.R new file mode 100644 index 0000000000000000000000000000000000000000..09348abb2ec7c45d68170a2c3da1d1d39c24b4e0 --- /dev/null +++ b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_5001_6000.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[5001:6000] +rGF_filtered<-rGF_filtered[5001:6000] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_3_gblup_variances_all=rep(list(list()),cycles) +L2_3_gblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_variances_all=rep(list(list()),cycles) +L2_3_gfblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_3...") + y=1000000*pheno_df$L2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_3_gblup_variances_all[[r]]<-var + L2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_3_gblup_variances_all[[r]]<-list() + L2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_3_gfblup_variances_all[[r]]<-var + L2_3_gfblup_prediction_all[[r]]<-pred + L2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_3_gblup_variances_all,"L2_3_gblup_variances_all_5001_6000.rds") +saveRDS(L2_3_gblup_prediction_all,"L2_3_gblup_prediction_all_5001_6000.rds") +saveRDS(L2_3_gfblup_variances_all,"L2_3_gfblup_variances_all_5001_6000.rds") +saveRDS(L2_3_gfblup_prediction_all,"L2_3_gfblup_prediction_all_5001_6000.rds") +saveRDS(L2_3_gfblup_validate_all,"L2_3_gfblup_validate_all_5001_6000.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_6001_7297.R b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_6001_7297.R new file mode 100644 index 0000000000000000000000000000000000000000..598290d6067e3cf3d15de09a33c7ee4055d38af9 --- /dev/null +++ b/code/using_GO/psii/L2_3_gblup_vs_gfblup_using_go_model_6001_7297.R @@ -0,0 +1,130 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] + +GF_filtered<-GF_filtered[6001:n] +rGF_filtered<-rGF_filtered[6001:n] +########################################################################################### +traits=ncol(pheno_df) +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L2_3_gblup_variances_all=rep(list(list()),cycles) +L2_3_gblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_variances_all=rep(list(list()),cycles) +L2_3_gfblup_prediction_all=rep(list(list()),cycles) +L2_3_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L2_3...") + y=1000000*pheno_df$L2_3 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L2_3_gblup_variances_all[[r]]<-var + L2_3_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L2_3_gblup_variances_all[[r]]<-list() + L2_3_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L2_3_gfblup_variances_all[[r]]<-var + L2_3_gfblup_prediction_all[[r]]<-pred + L2_3_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L2_3_gblup_variances_all,"L2_3_gblup_variances_all_6001_7297.rds") +saveRDS(L2_3_gblup_prediction_all,"L2_3_gblup_prediction_all_6001_7297.rds") +saveRDS(L2_3_gfblup_variances_all,"L2_3_gfblup_variances_all_6001_7297.rds") +saveRDS(L2_3_gfblup_prediction_all,"L2_3_gfblup_prediction_all_6001_7297.rds") +saveRDS(L2_3_gfblup_validate_all,"L2_3_gfblup_validate_all_6001_7297.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/combine_cv_results.R b/code/using_GO/psii/combine_cv_results.R new file mode 100644 index 0000000000000000000000000000000000000000..d57138a961b9004273ccc37f00b2711140528d05 --- /dev/null +++ b/code/using_GO/psii/combine_cv_results.R @@ -0,0 +1,27 @@ +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold") +L1_1=readRDS("L1_1_all_go_cv_results.rds") +L1_2=readRDS("L1_2_all_go_cv_results.rds") +L1_3=readRDS("L1_3_all_go_cv_results.rds") +L2_1=readRDS("L2_1_all_go_cv_results.rds") +L2_2=readRDS("L2_2_all_go_cv_results.rds") +L2_3=readRDS("L2_3_all_go_cv_results.rds") +H1_1=readRDS("H1_1_all_go_cv_results.rds") +H1_2=readRDS("H1_2_all_go_cv_results.rds") +H1_3=readRDS("H1_3_all_go_cv_results.rds") +H2_1=readRDS("H2_1_all_go_cv_results.rds") +H2_2=readRDS("H2_2_all_go_cv_results.rds") +H2_3=readRDS("H2_3_all_go_cv_results.rds") +H3_1=readRDS("H3_1_all_go_cv_results.rds") +H3_2=readRDS("H3_2_all_go_cv_results.rds") +H3_3=readRDS("H3_3_all_go_cv_results.rds") +H4_1=readRDS("H4_1_all_go_cv_results.rds") +H4_2=readRDS("H4_2_all_go_cv_results.rds") +H4_3=readRDS("H4_3_all_go_cv_results.rds") + +psii_all_go_cv_results=data.frame(matrix(nrow=0,ncol=ncol(L1_1))) +psii_all_go_cv_results=rbind(L1_1,L1_2,L1_3,L2_1,L2_2,L2_3,H1_1,H1_2,H1_3,H2_1,H2_2,H2_3,H3_1,H3_2,H3_3,H4_1,H4_2,H4_3) +psii_all_go_cv_results[is.na(psii_all_go_cv_results)] <- 0 + + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/") +saveRDS(psii_all_go_cv_results,"psii_all_go_cv_results.rds") diff --git a/code/using_GO/psii/gblup_vs_gfblup_using_go_analysis1.R b/code/using_GO/psii/gblup_vs_gfblup_using_go_analysis1.R new file mode 100644 index 0000000000000000000000000000000000000000..e02a7325a37d1dab8162dc2a948f4f9d286a3abf --- /dev/null +++ b/code/using_GO/psii/gblup_vs_gfblup_using_go_analysis1.R @@ -0,0 +1,151 @@ +#This analysis uses one step GFBLUP model to find the significant GO terms having higher predictive ability than GBLUP model. +################################################################################################################################## +# A N A L Y S I S +################################################################################################################################## +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") +require(GO.db) + +L2_2_gblup_variances_all=readRDS(file="./L2_2_gblup_variances_all.rds") +L2_2_gblup_prediction_all=readRDS(file="./L2_2_gblup_prediction_all.rds") +L2_2_gfblup_variances_all=readRDS(file="./L2_2_gfblup_variances_all.rds") +L2_2_gfblup_prediction_all=readRDS(file="./L2_2_gfblup_prediction_all.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +################################################################################################################################### +cycles=length(L2_2_gfblup_prediction_all) +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +#################################################################################################################################### +gb_pred=list() +gb_var_G=list() +gb_var_E=list() +gfb_pred=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_pred)=names(go_all_markers_filtered) +gfb_sigma_gf=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_sigma_gf)=names(go_all_markers_filtered) +gfb_sigma_rgf=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_sigma_rgf)=names(go_all_markers_filtered) +gfb_sigma_e=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_sigma_e)=names(go_all_markers_filtered) +gfb_llik=rep(list(list()),length(go_all_markers_filtered)) +names(gfb_llik)=names(go_all_markers_filtered) + +for(r in c(1:cycles)) +{ + gb_llik<-L2_2_gblup_variances_all[[r]]$llik + if(length(L2_2_gblup_prediction_all[[r]]$accuracy$Corr)>0) + { + gb_pred<-c(gb_pred,as.list(L2_2_gblup_prediction_all[[r]]$accuracy$Corr)) + temp=L2_2_gblup_variances_all[[r]]$theta/sum(L2_2_gblup_variances_all[[r]]$theta) + gb_var_G<- c(gb_var_G,as.list(temp["G"])) + gb_var_E<- c(gb_var_E,as.list(temp["E"])) + } + temp1=L2_2_gfblup_prediction_all[[r]] + temp2=L2_2_gfblup_variances_all[[r]] + + for(i in c(1:n)) + { + if(length(temp1[[i]]$accuracy$Corr)>0) + { + gfb_pred[[i]]<-c(gfb_pred[[i]],as.list(temp1[[i]]$accuracy$Corr)) + } + if(length(temp2[[i]]$theta)>0) + { + temp=temp2[[i]]$theta/sum(temp2[[i]]$theta) + gfb_sigma_gf[[i]]<- temp[1] + gfb_sigma_rgf[[i]]<- temp[2] + gfb_sigma_e[[i]]<- temp[3] + gfb_llik[[i]]<-temp2[[i]]$llik + } + } +} + +gblup_var_G=mean(unname(unlist(gb_var_G))) +gblup_var_E=mean(unname(unlist(gb_var_E))) +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +#Compare predictive ability based on all cycles each having k folds +gb=as.vector(unlist(unname(gb_pred))) +L2_2_all<-data.frame(matrix(nrow=n,ncol=15)) +colnames(L2_2_all)<-c("goid","weltch_t_test_pvalue","gblup_mean","gblup_stdev","gblup_sigma_g","gblup_sigma_e","gfblup_mean","gfblup_stdev","gf","rgf","e","gblup_llik","gfblup_llik","LR","wilcox") +#rownames(L2_2_all)<-names(gfb_pred) +for(i in c(1:n)) +{ + gfb=as.vector(unlist(unname(gfb_pred[i]))) + if(length(gb)>0 && length(gfb)>0) + { + t<-t.test(gb,gfb) + w<-wilcox.test(gb,gfb) + L2_2_all[i,1]=names(gfb_pred[i]) + L2_2_all[i,2]=t$p.value #pvalue in the first column + L2_2_all[i,3]=t$estimate[1] #mean of gblup + L2_2_all[i,4]=sd(gb,na.rm=TRUE) #STDEV of gblup + L2_2_all[i,5]= gblup_var_G #Variance of G of gblup + L2_2_all[i,6]= gblup_var_E #Variance of E of gblup + L2_2_all[i,7]=t$estimate[2] #mean of gfblup + L2_2_all[i,8]=sd(gfb,na.rm=TRUE) #STDEV of gfblup + L2_2_all[i,9]=unlist(unname(gfb_sigma_gf[i])) #variance proportion of this go term + L2_2_all[i,10]=unlist(unname(gfb_sigma_rgf[i])) + L2_2_all[i,11]=unlist(unname(gfb_sigma_e[i])) + L2_2_all[i,12]=gb_llik + L2_2_all[i,13]=gfb_llik[[i]] + L2_2_all[i,14]=2*(gfb_llik[[i]]-gb_llik) + L2_2_all[i,15]=w$p.value + } +} + +L2_2_all_important=L2_2_all[which(L2_2_all$gfblup_mean>L2_2_all$gblup_mean & L2_2_all$weltch_t_test_pvalue<0.05 & L2_2_all$wilcox<0.05),] +#dim(L2_2_all_important) + +if(nrow(L2_2_all_important)>0) +{ + go_annotation=as.data.frame(unlist(unname(as.list(Term(L2_2_all_important$goid))))) + gn=as.data.frame(go_all_genes_number[which(names(go_all_markers)%in% L2_2_all_important$goid)]) + gm=as.data.frame(go_all_markers_number[which(names(go_all_markers)%in% L2_2_all_important$goid)]) + L2_2_all_important=data.frame(L2_2_all_important,gn,gm,go_annotation) + colnames(L2_2_all_important)=c("goid","weltch_t_test_pvalue","gblup_mean","gblup_stdev","gblup_sigma_g","gblup_sigma_e","gfblup_mean","gfblup_stdev","gf","rgf","e","gblup_llik","gfblup_llik","LR","genes_number","markers_number","ontology") +} +write.table(L2_2_all, file="L2_2_gblup_vs_gfblup_all_using_go_analysis1.tbl", sep = "\t", dec = ".",row.names = TRUE, col.names = TRUE) +saveRDS(L2_2_all,"L2_2_all_analysis1.rds") +write.table(L2_2_all_important, file="L2_2_gblup_vs_gfblup_all_using_go_important_analysis1.tbl", sep = "\t", dec = ".",row.names = TRUE, col.names = TRUE) +saveRDS(L2_2_all_important,"L2_2_all_important_analysis1.rds") +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +################################################################################################################################## +# DIFFERENCE in RANDOM VS BIOLOGY DRIVEN SUBSET OF MARKERS (This doesn't seem to help alot, so no need to show this plot) +################################################################################################################################## +goid_important=rownames(L2_2_all_important) +go_all_markers_filtered_important=go_all_markers_filtered[names(go_all_markers_filtered) %in% goid_important] +gfb_pred_important=gfb_pred[names(gfb_pred) %in% goid_important] + +snp_set_size=c(50,100,200,1000) +random_gfb_pred=rep(list(list()),length(snp_set_size)) +names(random_gfb_pred)=names(snp_set_size) +j=1 +for(s in snp_set_size) +{ + random_L2_2_gfblup_prediction_all=readRDS(file=gsub("[[:blank:]]", "", paste("/mnt/LTR_userdata/faroo002/arabidopsis/GP/GFBLUP/random_snp_sets/L2_2_gfblup_prediction_all_",s,".rds"))) + no_of_repeats=length(random_L2_2_gfblup_prediction_all) + for(r in c(1:no_of_repeats)) + { + if(length(random_L2_2_gfblup_prediction_all[[r]]$accuracy$Corr)>0) + { + random_gfb_pred[[j]]<-c(random_gfb_pred[[j]],as.list(random_L2_2_gfblup_prediction_all[[r]]$accuracy$Corr)) + } + } + random_gf=na.omit(unlist(random_gfb_pred[[j]])) +j=j+1 +} +boxplot(unlist(random_gfb_pred[[1]]), +unlist(random_gfb_pred[[2]]), +unlist(random_gfb_pred[[3]]), +unlist(random_gfb_pred[[4]]), +unlist(gfb_pred_important[[1]]), +unlist(gfb_pred_important[[2]]), +unlist(gfb_pred_important[[3]]), +unlist(gfb_pred_important[[4]]), +unlist(gfb_pred_important[[5]]), +xlab="Gene ontology vs random SNP sampling", +ylab="Predictive ability", +names=c(snp_set_size,L2_2_all_important[,11]), +main="Predictive ability using random SNP subsets vs GO based subsets") +#################################################################################################################### diff --git a/code/using_GO/psii/gblup_vs_gfblup_using_go_model.R b/code/using_GO/psii/gblup_vs_gfblup_using_go_model.R new file mode 100644 index 0000000000000000000000000000000000000000..a758dbb227fe021c6f9cc1e46084a23ed9ebc234 --- /dev/null +++ b/code/using_GO/psii/gblup_vs_gfblup_using_go_model.R @@ -0,0 +1,126 @@ +############################# FUNCTIONS ############################################## +trim <- function (x) gsub("^\\s+|\\s+$", "", x) +`%not_in%` <- purrr::negate(`%in%`) +# Function to check whether package is installed +is.installed <- function(mypkg) +{ + is.element(mypkg, installed.packages()[,1]) +} +######################################################################################## + +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") + +#if (!is.installed("backports")) +#{ +# install.packages("backports") +#} +#if (!is.installed("devtools")) +#{ +# install.packages("devtools") +#} +#library("devtools") +#if (!is.installed("qgg")) +#{ +# options(devtools.install.args=" --no-multiargs") +# devtools::install_github("psoerensen/qgg") +#} +message("Loading required packages...") +library(GO.db) +library(org.At.tair.db) +#columns(org.At.tair.db) +library('qgg') +library('dplyr') +########################################################################################### +message("Loading data...") +MAC_matrix_with_header=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds") +accessions=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds") +all_nucl_genes_bed=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds") +ath_all_new_maf_ldpruned_map=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds") +W=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds") +load(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/G.Rdata") +Pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/Pheno.rds") +geno_pheno=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/geno_pheno.rds") +MAC_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds") +pheno_df=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/pheno_df.rds") +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +markerSets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/markerSets.rds") #7297 excluding those go terms with zero markers +setsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/setsGF.rds") #7297 excluding those go terms with zero markers +rsetsGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rsetsGF.rds") #7297 excluding those go terms with zero markers +nsets=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsets.rds") #7297 excluding those go terms with zero markers +nsnps=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/nsnps.rds") #7297 excluding those go terms with zero markers +GF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/GF.rds") #7297 excluding those go terms with zero markers +rGF=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/rGF.rds") #7297 excluding those go terms with zero markers +######################################################################################## +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +GF_filtered<-GF[names(go_all_markers_filtered)] +rGF_filtered<-rGF[names(go_all_markers_filtered)] +########################################################################################### +cycles=10; #gblup_validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); + gblup_validate=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/GBLUP/psii/8fold/gblup_validate_all.rds") +n_folds <- 8 + +L1_1_gblup_variances_all=rep(list(list()),cycles) +L1_1_gblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_variances_all=rep(list(list()),cycles) +L1_1_gfblup_prediction_all=rep(list(list()),cycles) +L1_1_gfblup_validate_all=rep(list(list()),cycles) + +for(r in 1:cycles) +{ + message(paste("cycle#",r,"...")) + a=as.numeric(Sys.time()) + set.seed(a) #generate random folds sets everytime + folds_index <- sample(rep(1:n_folds, length.out = nrow(pheno_df))) + validate=matrix(unlist(gblup_validate[[r]]),round(nrow(pheno_df)/n_folds),byrow=TRUE); #validate=matrix(nrow=round(nrow(pheno_df)/n_folds),ncol=n_folds) + for(i in c(1:n_folds)) + { + #validate[,i]=which(folds_index==i) + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + message ("L1_1...") + y=1000000*pheno_df$L1_1 + fm <- y ~ 1*y + X <- model.matrix(fm) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + result = tryCatch({ + var <- greml(y = y, X = X, GRM = list(G=G),ncores=1) + pred <- greml(y = y, X = X, GRM = list(G=G), validate = validate,ncores=1) + L1_1_gblup_variances_all[[r]]<-var + L1_1_gblup_prediction_all[[r]]<-pred + }, error = function(e) { + L1_1_gblup_variances_all[[r]]<-list() + L1_1_gblup_prediction_all[[r]]<-list() + }) #try catch ends + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~GFBLUP~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var <-rep(list(list()),length(GF_filtered)) + pred <-rep(list(list()),length(GF_filtered)) + names(var)<-(names(GF_filtered)) + names(pred)<-(names(GF_filtered)) + for(k in c(1:length(GF_filtered))) + { + message(paste("GO#",k,"...")) + x=(r*k)+((r-1)*(length(GF_filtered)-k)) + result = tryCatch({ + var[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]),ncores=1) + pred[[k]] <- greml(y = y, X = X, GRM = c(GF_filtered[k],rGF_filtered[k]), validate = validate,ncores=1) + }, error = function(e) { + var[[k]]<-list() + pred[[k]]<-list() + #print("gfblup error") + }) #try catch ends + } + L1_1_gfblup_variances_all[[r]]<-var + L1_1_gfblup_prediction_all[[r]]<-pred + L1_1_gfblup_validate_all[[r]]<-list(validate) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} +saveRDS(L1_1_gblup_variances_all,"L1_1_gblup_variances_all.rds") +saveRDS(L1_1_gblup_prediction_all,"L1_1_gblup_prediction_all.rds") +saveRDS(L1_1_gfblup_variances_all,"L1_1_gfblup_variances_all.rds") +saveRDS(L1_1_gfblup_prediction_all,"L1_1_gfblup_prediction_all.rds") +saveRDS(L1_1_gfblup_validate_all,"L1_1_gfblup_validate_all.rds") + +################################################################################################################################## diff --git a/code/using_GO/psii/psii_all_go_cv_results1.R b/code/using_GO/psii/psii_all_go_cv_results1.R new file mode 100644 index 0000000000000000000000000000000000000000..7e3c3d8a88259ca6eb244db940fca106ac1350a6 --- /dev/null +++ b/code/using_GO/psii/psii_all_go_cv_results1.R @@ -0,0 +1,79 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) +################################################################################################################################## +# A N A L Y S I S +################################################################################################################################## +require(GO.db) + +go_all_genes_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_genes_number.rds") #7432 +go_all_markers_number=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers_number.rds") #7432 +go_all_markers=readRDS(file="/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/go/go_all_markers.rds") #7432 +################################################################################################################################## +# PSII GO +################################################################################################################################## +setwd("/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/using_GO/psii/8fold/") +################################################################################################################################### +cycles=10 +n_folds=8 +n=length(go_all_genes_number[go_all_markers_number>0]) +go_all_markers_filtered<-go_all_markers[go_all_markers_number>0] +#################################################################################################################################### +res=data.frame(matrix(nrow=0,ncol=10)) +colnames(res)=c("time","gblup_acc","gblup_G","gblup_E","goid","gfblup_acc","gfblup_Gf","gfblup_rGf","gfblup_E","gfblup_h2f") + +t=args[1] +message(t) + gblup_prediction_all=readRDS(file=paste0(t,"_gblup_prediction_all.rds")) + gfblup_prediction_all=readRDS(file=paste0(t,"_gfblup_prediction_all.rds")) + time=c(rep(t,n_folds)) + + for(r in c(1:cycles)) + { +message(paste("cycle",r)) + temp1=gfblup_prediction_all[[r]] + + for(i in c(1:n)) + { +message(paste("GO#",i)) + goid=c(rep(names(temp1)[i],n_folds)) + if(length(gblup_prediction_all[[r]])>0) + { + gb_pred<-c(gblup_prediction_all[[r]]$accuracy$Corr) + m=as.matrix(gblup_prediction_all[[r]]$theta) + gb_var_G=apply(m,1,function(arg){arg[1]/sum(arg)}) + gb_var_E=apply(m,1,function(arg){arg[2]/sum(arg)}) + + + } + else + { + gb_pred<-c(rep(0,n_folds)) + gb_var_G<-c(rep(0,n_folds)) + gb_var_E<-c(rep(0,n_folds)) + } + + if(length(temp1[[i]])>0) + { + gfb_pred<-c(temp1[[i]]$accuracy$Corr) + m=as.matrix(temp1[[i]]$theta) + gfb_var_Gf=apply(m,1,function(arg){arg[1]/sum(arg)}) + gfb_var_rGf=apply(m,1,function(arg){arg[2]/sum(arg)}) + gfb_var_E=apply(m,1,function(arg){arg[3]/sum(arg)}) + gfb_h2f=(gfb_var_Gf)/(gfb_var_Gf+gfb_var_rGf+gfb_var_E) + + } + else + { + gfb_pred<-c(rep(0,n_folds)) + gfb_var_Gf<-c(rep(0,n_folds)) + gfb_var_rGf<-c(rep(0,n_folds)) + gfb_h2f<-c(rep(0,n_folds)) + gfb_var_E<-c(rep(0,n_folds)) + } + res=rbind(res,data.frame(time,gb_pred,gb_var_G,gb_var_E,goid,gfb_pred,gfb_var_Gf,gfb_var_rGf,gfb_var_E,gfb_h2f)) + } + + } +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +saveRDS(res,paste0(t,"_all_go_cv_results.rds")) +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ No newline at end of file diff --git a/code/using_GO/psii/test.R b/code/using_GO/psii/test.R new file mode 100644 index 0000000000000000000000000000000000000000..af469f206bcd6926ccb4a5f303d29dc624d08e5d --- /dev/null +++ b/code/using_GO/psii/test.R @@ -0,0 +1 @@ +message(args[1]) \ No newline at end of file