Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
pub1
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Farooq, Muhammad
pub1
Commits
42e3804c
Commit
42e3804c
authored
4 years ago
by
Farooq, Muhammad
Browse files
Options
Downloads
Patches
Plain Diff
uploaded
parent
78adac1c
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
code/make_coex_features_all.R
+11
-38
11 additions, 38 deletions
code/make_coex_features_all.R
code/make_go_features_all.R
+135
-0
135 additions, 0 deletions
code/make_go_features_all.R
with
146 additions
and
38 deletions
code/make_coex_features_all.R
+
11
−
38
View file @
42e3804c
...
...
@@ -8,27 +8,21 @@ is.installed <- function(mypkg)
is.element
(
mypkg
,
installed.packages
()[,
1
])
}
########################################################################################
setwd
(
"
/mnt/LTR_userdata/faroo002/arabidopsis/GP
/priors/coexpression"
)
setwd
(
"
../data
/priors/coexpression"
)
library
(
'dplyr'
)
library
(
'qgg'
)
########################################################################################
MAC_matrix_with_header
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_matrix_with_header.rds"
)
accessions
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/accessions.rds"
)
all_nucl_genes_bed
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/all_nucl_genes_bed.rds"
)
ath_all_new_maf_ldpruned_map
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/ath_all_new_maf_ldpruned_map.rds"
)
MAC_df
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/MAC_df.rds"
)
W
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/W.rds"
)
all_coexpression_clusters
<-
as.data.frame
(
read.table
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt"
,
header
=
TRUE
,
sep
=
""
,
strip.white
=
TRUE
))
all_coexpression_clusters
<-
as.data.frame
(
read.table
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/GBLUP_vs_GFBLUP/based_on_coexpression/ath_gene_clusters.txt"
,
header
=
TRUE
,
sep
=
""
,
strip.white
=
TRUE
))
coex_markers_number
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers_number.rds"
)
#7432
coex_markers
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/coex_markers.rds"
)
#7432
markerSets
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/coexpression/markerSets.rds"
)
MAC_matrix_with_header
=
readRDS
(
file
=
"../../MAC_matrix_with_header.rds"
)
accessions
=
readRDS
(
file
=
"../../accessions.rds"
)
all_nucl_genes_bed
=
readRDS
(
file
=
"../../all_nucl_genes_bed.rds"
)
ath_all_new_maf_ldpruned_map
=
readRDS
(
file
=
"../../ath_all_new_maf_ldpruned_map.rds"
)
MAC_df
=
readRDS
(
file
=
"../../MAC_df.rds"
)
W
=
readRDS
(
file
=
"../../W.rds"
)
all_coexpression_clusters
<-
as.data.frame
(
read.table
(
file
=
"ath_gene_clusters.txt"
,
header
=
TRUE
,
sep
=
""
,
strip.white
=
TRUE
))
coex_markers_number
=
readRDS
(
file
=
"coex_markers_number.rds"
)
#7432
coex_markers
=
readRDS
(
file
=
"coex_markers.rds"
)
#7432
markerSets
=
readRDS
(
file
=
"markerSets.rds"
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
regmap_W
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_W.rds"
)
#This is based on only regmap dataset using call_method_75_TAIR9_maf001_ldpruned
regmap_G
=
readRDS
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/priors/data/regmap_G.rds"
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
psii_filtered_coex_df_unique
=
read.table
(
file
=
"/mnt/LTR_userdata/faroo002/arabidopsis/GP/using_conda/psii_filtered_coex_df_unique.tbl"
,
header
=
T
,
sep
=
"\t"
)
psii_filtered_clusterids
=
as.character
(
psii_filtered_coex_df_unique
$
clusterid
)
###########################################################################################
message
(
"Retrieving markers for each feature"
)
if
(
!
exists
(
"coex_markers"
))
...
...
@@ -100,24 +94,3 @@ saveRDS(rGF,"./rGF.rds")
}
########################################################################################
#Making GRM from only Regmap dataset without flipping
message
(
"Making Genomic Relationship Matrices..."
)
markerSets
=
markerSets
[
which
(
names
(
markerSets
)
%in%
psii_filtered_clusterids
)]
regmap_setsGF
<-
lapply
(
markerSets
,
function
(
x
){
x
[
x
%in%
colnames
(
regmap_W
)]
})
regmap_rsetsGF
<-
lapply
(
markerSets
,
function
(
x
){
colnames
(
regmap_W
)[
colnames
(
regmap_W
)
%not_in%
x
]
})
#remaining markers for each feature GO term
regmap_nsets
<-
length
(
regmap_setsGF
)
regmap_nsnps
<-
sapply
(
regmap_setsGF
,
length
)
saveRDS
(
regmap_setsGF
,
"regmap_setsGF.rds"
)
saveRDS
(
regmap_rsetsGF
,
"regmap_rsetsGF.rds"
)
saveRDS
(
regmap_nsets
,
"regmap_nsets.rds"
)
saveRDS
(
regmap_nsnps
,
"regmap_nsnps.rds"
)
#Make Genomic Relationship Matrix for each feature and its remaining markers.
regmap_GF
<-
lapply
(
regmap_setsGF
,
function
(
x
)
{
grm
(
W
=
regmap_W
[,
x
])})
saveRDS
(
regmap_GF
,
"regmap_GF.rds"
)
regmap_rGF
<-
lapply
(
regmap_rsetsGF
,
function
(
x
)
{
grm
(
W
=
regmap_W
[,
x
])})
saveRDS
(
regmap_rGF
,
"regmap_rGF.rds"
)
########################################################################################
This diff is collapsed.
Click to expand it.
code/make_go_features_all.R
0 → 100644
+
135
−
0
View file @
42e3804c
message
(
"start of get_go_features.R..."
)
############################# FUNCTIONS ##############################################
trim
<-
function
(
x
)
gsub
(
"^\\s+|\\s+$"
,
""
,
x
)
`%not_in%`
<-
purrr
::
negate
(
`%in%`
)
# Function to check whether package is installed
is.installed
<-
function
(
mypkg
)
{
is.element
(
mypkg
,
installed.packages
()[,
1
])
}
########################################################################################
setwd
(
"../data/priors/go/"
)
#if (!is.installed("backports"))
#{
# install.packages("backports")
#}
#if (!is.installed("devtools"))
#{
# install.packages("devtools")
#}
#library("devtools")
#if (!is.installed("qgg"))
#{
# options(devtools.install.args=" --no-multiargs")
# devtools::install_github("psoerensen/qgg")
#}
library
(
'qgg'
)
library
(
'dplyr'
)
library
(
GO.db
)
library
(
org.At.tair.db
)
########################################################################################
MAC_matrix_with_header
=
readRDS
(
file
=
"../../MAC_matrix_with_header.rds"
)
accessions
=
readRDS
(
file
=
"../../accessions.rds"
)
all_nucl_genes_bed
=
readRDS
(
file
=
"../../all_nucl_genes_bed.rds"
)
ath_all_new_maf_ldpruned_map
=
readRDS
(
file
=
"../../ath_all_new_maf_ldpruned_map.rds"
)
Pheno
=
readRDS
(
file
=
"../../Pheno.rds"
)
geno_pheno
=
readRDS
(
file
=
"../../geno_pheno.rds"
)
MAC_df
=
readRDS
(
file
=
"../../MAC_df.rds"
)
pheno_df
=
readRDS
(
file
=
"../../pheno_df.rds"
)
W
=
readRDS
(
file
=
"../../W.rds"
)
go_all_genes_number
=
readRDS
(
file
=
"go_all_genes_number.rds"
)
#7432
go_all_markers_number
=
readRDS
(
file
=
"go_all_markers_number.rds"
)
#7432
go_all_markers
=
readRDS
(
file
=
"go_all_markers.rds"
)
#7432
markerSets
=
readRDS
(
file
=
"markerSets.rds"
)
#7297
setsGF
=
readRDS
(
file
=
"setsGF.rds"
)
#7297
rsetsGF
=
readRDS
(
file
=
"rsetsGF.rds"
)
#7297
nsets
=
readRDS
(
file
=
"nsets.rds"
)
#7297
nsnps
=
readRDS
(
file
=
"nsnps.rds"
)
#7297
GF
=
readRDS
(
file
=
"GF.rds"
)
#7297
rGF
=
readRDS
(
file
=
"rGF.rds"
)
#7297
###########################################################################################
message
(
"Retrieving markers for each GO"
)
if
(
!
exists
(
"go_all_markers"
))
{
go2alltair
<-
as.list
(
org.At.tairGO2ALLTAIRS
)
go_all_markers
<-
rep
(
list
(
list
()),
length
(
go2alltair
))
names
(
go_all_markers
)
<-
(
names
(
go2alltair
))
go_all_genes_number
<-
numeric
(
length
=
length
(
go_all_markers
))
if
(
length
(
go2alltair
)
>
0
)
{
for
(
i
in
c
(
1
:
length
(
go2alltair
)))
#Loop through all Go terms
{
message
(
paste
(
"GO#"
,
i
))
goid
=
names
(
go2alltair
[
i
])
tair_ids
=
go2alltair
[
i
]
lst_of_genes
=
list
()
for
(
j
in
1
:
length
(
tair_ids
[[
1
]]))
#Loop through all Tair IDs in a GO term
{
lst_of_genes
=
c
(
lst_of_genes
,
unlist
(
unname
(
tair_ids
[[
1
]][
j
])))
}
unique_lst_of_tair_ids
<-
as.vector
(
unique
(
unlist
(
lst_of_genes
)))
go_all_genes_number
[
i
]
<-
length
(
unique_lst_of_tair_ids
)
mlst
<-
list
()
for
(
j
in
1
:
length
(
unique_lst_of_tair_ids
))
{
#for each gene find its coordinates and then find all markers in it. then append these markers to corresponding GO feature
chromosome
=
as.numeric
(
all_nucl_genes_bed
[
all_nucl_genes_bed
[
"name"
]
==
unique_lst_of_tair_ids
[
j
]][
1
])
startpos
=
as.numeric
(
all_nucl_genes_bed
[
all_nucl_genes_bed
[
"name"
]
==
unique_lst_of_tair_ids
[
j
]][
2
])
endpos
=
as.numeric
(
all_nucl_genes_bed
[
all_nucl_genes_bed
[
"name"
]
==
unique_lst_of_tair_ids
[
j
]][
3
])
markers
=
ath_all_new_maf_ldpruned_map
%>%
filter
(
chr
==
chromosome
&
position
>=
startpos
&
position
<=
endpos
)
%>%
mutate_if
(
is.factor
,
as.character
)
#print(markers)
mlst
<-
c
(
mlst
,
as.list
(
markers
[
"marker_name"
]))
#print (mlst)
}
go_all_markers
[
i
]
<-
list
(
unique
(
unlist
(
unname
(
mlst
))))
}
go_all_markers_number
<-
numeric
(
length
=
length
(
go_all_markers
))
for
(
i
in
c
(
1
:
length
(
go_all_markers
)))
{
go_all_markers_number
[
i
]
=
length
(
go_all_markers
[[
i
]])
}
}
saveRDS
(
go_all_genes_number
,
"go_all_genes_number.rds"
)
saveRDS
(
go_all_markers_number
,
"go_all_markers_number.rds"
)
saveRDS
(
go_all_markers
,
"go_all_markers.rds"
)
}
###########################################################################################
#Make Genomic Relationship Matrix for each feature and its remaining markers
message
(
"Making Genomic Relationship Matrices..."
)
if
(
!
exists
(
"rGF"
))
{
markerSets
<-
list
()
for
(
i
in
c
(
1
:
length
(
go_all_markers
)))
{
if
(
length
(
go_all_markers
[[
i
]])
>
0
)
{
markerSets
=
c
(
markerSets
,
go_all_markers
[
i
])
}
}
setsGF
<-
lapply
(
markerSets
,
function
(
x
){
x
[
x
%in%
colnames
(
W
)]
})
rsetsGF
<-
lapply
(
markerSets
,
function
(
x
){
colnames
(
W
)[
colnames
(
W
)
%not_in%
x
]
})
#remaining markers for each feature GO term
nsets
<-
length
(
setsGF
)
nsnps
<-
sapply
(
setsGF
,
length
)
saveRDS
(
markerSets
,
"markerSets.rds"
)
saveRDS
(
setsGF
,
"setsGF.rds"
)
saveRDS
(
rsetsGF
,
"rsetsGF.rds"
)
saveRDS
(
nsets
,
"nsets.rds"
)
saveRDS
(
nsnps
,
"nsnps.rds"
)
#Make Genomic Relationship Matrix for each feature and its remaining markers.
GF
<-
lapply
(
setsGF
,
function
(
x
)
{
grm
(
W
=
W
[,
x
])})
saveRDS
(
GF
,
"GF.rds"
)
rGF
<-
lapply
(
rsetsGF
,
function
(
x
)
{
grm
(
W
=
W
[,
x
])})
saveRDS
(
rGF
,
"rGF.rds"
)
}
########################################################################################
message
(
"end of get_go_features.R..."
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment