Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
ibrowser
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Aflitos, Saulo Alves
ibrowser
Commits
398ab7eb
Commit
398ab7eb
authored
10 years ago
by
Aflitos, Saulo Alves
Browse files
Options
Downloads
Patches
Plain Diff
converter for multicolumn vcf. again
parent
e7aeb5bd
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
vcfmerger/gen_makefile.py
+20
-20
20 additions, 20 deletions
vcfmerger/gen_makefile.py
vcfmerger/gen_makefile.py.examples
+16
-16
16 additions, 16 deletions
vcfmerger/gen_makefile.py.examples
vcfmerger/split_multicolumn_vcf.py
+96
-0
96 additions, 0 deletions
vcfmerger/split_multicolumn_vcf.py
with
132 additions
and
36 deletions
vcfmerger/gen_makefile.py
+
20
−
20
View file @
398ab7eb
...
...
@@ -10,62 +10,62 @@ timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_50k_sing
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_10k
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_10k_sing
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_genes
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k_introgression
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k_introgression
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_greedy
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete_greedy
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_10k
SCRIPT_DIR
=
'
vcfmerger
'
AUX_DIR
=
os
.
path
.
join
(
SCRIPT_DIR
,
'
aux
'
)
AUX_DIR
=
os
.
path
.
join
(
SCRIPT_DIR
)
merger
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'
vcfmerger.py
'
)
)
...
...
@@ -76,8 +76,8 @@ walk_ram = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcf_walk_ram.py' ) )
walk_sql
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'
vcf_walk_sql.py
'
)
)
cluster
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'
cluster.py
'
)
)
topng
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'
newick_to_png.py
'
)
)
fasta_spacer
=
os
.
path
.
abspath
(
os
.
path
.
join
(
AUX
_DIR
,
'
fasta_spacer.py
'
)
)
tree_maker
=
os
.
path
.
abspath
(
os
.
path
.
join
(
AUX
_DIR
,
'
FastTreeMP
'
)
)
fasta_spacer
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT
_DIR
,
'
fasta_spacer.py
'
)
)
tree_maker
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT
_DIR
,
'
FastTreeMP
'
)
)
class
makewriter
(
object
):
...
...
@@ -372,7 +372,7 @@ def main(args):
if
infasta
:
#vcfmerger/
aux/
fasta_spacer.py GENOME.fa 50000
#vcfmerger/fasta_spacer.py GENOME.fa 50000
gff_cmd
=
"
%s %s %s
"
%
(
fasta_spacer
,
infasta
,
size
)
writer
.
write
(
infasta
,
filter_gff
,
gff_cmd
,
nick
=
'
gff
'
)
...
...
This diff is collapsed.
Click to expand it.
vcfmerger/gen_makefile.py.examples
+
16
−
16
View file @
398ab7eb
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_50k
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_50k
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_50k_sing
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_10k
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_10k_sing
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_genes
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k_introgression
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k_introgression
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_50k
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
make -f makefile_RIL_50k_mode_ril
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_greedy
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete_greedy
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_10k
...
...
This diff is collapsed.
Click to expand it.
vcfmerger/split_multicolumn_vcf.py
0 → 100755
+
96
−
0
View file @
398ab7eb
#!/usr/bin/python
import
os
import
sys
import
string
ignores
=
[
'
0/0
'
,
'
./.
'
]
# reference, nocov
valid_chars
=
frozenset
(
"
_%s%s
"
%
(
string
.
ascii_letters
,
string
.
digits
))
def
sanitize
(
name
):
return
''
.
join
(
c
if
c
in
valid_chars
else
'
_
'
for
c
in
name
)
def
main
():
try
:
infile
=
os
.
sys
.
argv
[
1
]
except
:
print
"
no input file given
"
print
sys
.
argv
[
0
],
"
<INPUT MULTICOLUMN CSV>
"
sys
.
exit
(
1
)
if
not
os
.
path
.
exists
(
infile
):
print
"
input file %s does not exists
"
%
infile
sys
.
exit
(
1
)
if
os
.
path
.
isdir
(
infile
):
print
"
input file %s is a folder
"
%
infile
sys
.
exit
(
1
)
print
"
splitting %s
"
%
infile
defs
=
[]
names
=
[]
outfiles
=
[]
num_cols
=
None
with
open
(
infile
)
as
fhd
:
for
line
in
fhd
:
line
=
line
.
strip
()
if
len
(
line
)
==
0
:
continue
if
line
.
startswith
(
"
#
"
):
# header
#print "HEADER", line
if
line
.
startswith
(
"
##
"
):
# definition lines
#print "HEADER :: DEF", line
defs
.
append
(
line
)
else
:
# column description
#print "HEADER :: COL", line
cols
=
line
.
split
(
"
\t
"
)
num_cols
=
len
(
cols
)
shared
=
cols
[:
9
]
#CHROM POS ID REF ALT QUAL FILTER INFO FORMA
names
=
cols
[
9
:]
#print "HEADER :: COL :: SHARED", shared
#print "HEADER :: COL :: NAMES" , names
outfiles
=
[
None
]
*
len
(
names
)
outlist
=
open
(
"
%s.lst
"
%
infile
,
'
w
'
)
for
np
,
name
in
enumerate
(
names
):
nof
=
(
"
%s_%0
"
+
str
(
len
(
"
%d
"
%
len
(
names
)))
+
"
d_%s.vcf
"
)
%
(
infile
,
np
+
1
,
sanitize
(
name
))
print
(
"
creating %
"
+
str
(
len
(
"
%d
"
%
len
(
names
)))
+
"
d %-
"
+
str
(
max
([
len
(
x
)
for
x
in
names
]))
+
"
s to %s
"
)
%
(
np
+
1
,
name
,
nof
)
nop
=
open
(
nof
,
'
w
'
)
# skipped valid
outfiles
[
np
]
=
[
name
,
nof
,
nop
,
0
,
0
]
outlist
.
write
(
"
1
\t
%s
\t
%s
\n
"
%
(
os
.
path
.
abspath
(
nof
),
name
))
nop
.
write
(
"
\n
"
.
join
(
defs
)
+
"
\n
"
)
nop
.
write
(
"
##Split from: %s column %d
\n
"
%
(
os
.
path
.
abspath
(
infile
),
np
+
1
)
)
nop
.
write
(
"
\t
"
.
join
(
shared
))
nop
.
write
(
"
\t
%s
\n
"
%
name
)
nop
.
flush
()
continue
#print "DATA", line
cols
=
line
.
split
(
"
\t
"
)
assert
len
(
cols
)
==
num_cols
shared
=
cols
[:
9
]
#CHROM POS ID REF ALT QUAL FILTER INFO FORMA
data
=
cols
[
9
:]
#print "shared", shared
#print "data" , data
for
pos
,
ndata
in
enumerate
(
data
):
#outfiles[np] = [name, nof, 0, 0, nop]
if
any
([
ndata
.
startswith
(
x
)
for
x
in
ignores
]):
outfiles
[
pos
][
3
]
+=
1
# skipped
continue
outfiles
[
pos
][
4
]
+=
1
# valid
outfiles
[
pos
][
2
].
write
(
"
\t
"
.
join
(
shared
)
+
"
\t
%s
\n
"
%
ndata
)
for
nop
,
ndata
in
enumerate
(
outfiles
):
ndata
[
2
].
close
()
print
(
"
closing %
"
+
str
(
len
(
"
%d
"
%
len
(
outfiles
)))
+
"
d %-
"
+
str
(
max
([
len
(
x
[
0
])
for
x
in
outfiles
]))
+
"
s :: %-
"
+
str
(
max
([
len
(
x
[
1
])
for
x
in
outfiles
]))
+
"
s :: skipped %6d exported %6d total %7d
"
)
%
(
nop
+
1
,
ndata
[
0
],
ndata
[
1
],
ndata
[
3
],
ndata
[
4
],
ndata
[
3
]
+
ndata
[
4
])
if
__name__
==
'
__main__
'
:
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment