Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
SAM harmonization
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Brankovics, Balazs
SAM harmonization
Compare revisions
da5da828c8b3f8eb7f9f7ea062539df36617f891 to 0c10b8d5aa41271803c9ff8d97421c7a3ce4680f
Compare revisions
Changes are shown as if the
source
revision was being merged into the
target
revision.
Learn more about comparing revisions.
Source
brank001/sam-harmonization
Select target project
No results found
0c10b8d5aa41271803c9ff8d97421c7a3ce4680f
Select Git revision
Swap
Target
brank001/sam-harmonization
Select target project
brank001/sam-harmonization
1 result
da5da828c8b3f8eb7f9f7ea062539df36617f891
Select Git revision
Show changes
Only incoming changes from source
Include changes to target since source was created
Compare
Commits on Source (2)
filter based on RNAME and manageing paired reads
· dd516ffb
Brankovics, Balazs
authored
2 years ago
dd516ffb
Merge branch 'dev' into main
· 0c10b8d5
Brankovics, Balazs
authored
2 years ago
0c10b8d5
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/sam-keep-best.pl
+34
-6
34 additions, 6 deletions
src/sam-keep-best.pl
src/sam-ref-grep.pl
+91
-0
91 additions, 0 deletions
src/sam-ref-grep.pl
with
125 additions
and
6 deletions
src/sam-keep-best.pl
View file @
0c10b8d5
...
...
@@ -10,7 +10,7 @@ use File::Basename;
use
Data::
Dumper
;
my
$programname
=
"
sam-keep-best.pl
";
my
$version
=
"
1.
1
";
my
$version
=
"
1.
2
";
my
$cmd
=
join
("
",
$programname
,
@ARGV
);
#===DESCRIPTION=================================================================
...
...
@@ -165,6 +165,17 @@ while(<>) {
}
next
if
$hit
{"
FLAG
"}
&
4
||
$hit
{'
RNAME
'}
eq
"
*
";
# Add read pair flags ('/1' and '/2' for forward and reverse reads of a pair)
# otherwise false conclusions will be used
# these flags will be removed before printing to OUTPUT
if
(
$hit
{"
FLAG
"}
&
64
)
{
$hit
{'
QNAME
'}
.=
"
/1
";
}
elsif
(
$hit
{"
FLAG
"}
&
128
)
{
$hit
{'
QNAME
'}
.=
"
/2
";
}
# Get alginment data
my
$aln
=
biointsam::
parse_cigar
(
$hit
{'
CIGAR
'},
$hit
{'
FLAG
'},
$hit
{'
SEQ
'});
# Store the length of queey seq
...
...
@@ -338,9 +349,11 @@ if ($mode eq "local") {
# Print equals and empty @equals
while
(
@equals
)
{
my
$x
=
shift
@equals
;
print
biointsam::
sam_string
(
$x
->
{'
hit
'}),
"
\n
";
# print biointsam::sam_string($x->{'hit'}), "\n";
&print_sam
(
$x
->
{'
hit
'});
}
print
biointsam::
sam_string
(
$a
->
{'
hit
'}),
"
\n
";
# print biointsam::sam_string($a->{'hit'}), "\n";
&print_sam
(
$a
->
{'
hit
'});
$a
=
$b
;
# Update tolerance if percent option is selected
$e
=
(
$a
->
{'
to
'}
-
$a
->
{'
from
'}
+
1
)
*
$percentoverlap
if
$percentoverlap
;
...
...
@@ -348,9 +361,11 @@ if ($mode eq "local") {
}
while
(
@equals
)
{
my
$x
=
shift
@equals
;
print
biointsam::
sam_string
(
$x
->
{'
hit
'}),
"
\n
";
# print biointsam::sam_string($x->{'hit'}), "\n";
&print_sam
(
$x
->
{'
hit
'});
}
print
biointsam::
sam_string
(
$a
->
{'
hit
'}),
"
\n
"
if
$a
;
# print biointsam::sam_string($a->{'hit'}), "\n" if $a;
&print_sam
(
$a
->
{'
hit
'});
}
}
elsif
(
$mode
eq
"
summative
")
{
...
...
@@ -411,7 +426,8 @@ if ($mode eq "local") {
}
for
my
$pair
(
@best
)
{
for
my
$hit
(
@
{
$pair
->
{"
hits
"}
})
{
print
biointsam::
sam_string
(
$hit
),
"
\n
";
# print biointsam::sam_string($hit), "\n";
&print_sam
(
$hit
);
}
}
}
...
...
@@ -419,6 +435,18 @@ if ($mode eq "local") {
#===SUBROUTINES=================================================================
sub
print_sam
{
my
(
$hash
)
=
@_
;
# Remove read pair flags ('/1' and '/2' for forward and reverse reads of a pair)
if
(
$hash
->
{'
FLAG
'}
&
64
)
{
$hash
->
{'
QNAME
'}
=~
s/\/1$//
;
}
elsif
(
$hash
->
{'
FLAG
'}
&
128
)
{
$hash
->
{'
QNAME
'}
=~
s/\/2$//
;
}
print
biointsam::
sam_string
(
$hash
),
"
\n
";
}
sub
update_pair
{
# Update pair info
# This is used for summative mode
...
...
This diff is collapsed.
Click to expand it.
src/sam-ref-grep.pl
0 → 100755
View file @
0c10b8d5
#!/usr/bin/env perl
use
warnings
;
use
strict
;
use
FindBin
;
# locate this script
use
lib
"
$FindBin
::RealBin/lib
";
# use the lib directory
use
biointsam
;
use
biointbasics
;
my
$programname
=
"
sam-ref-grep.pl
";
my
$version
=
"
1.0
";
my
$cmd
=
join
("
",
$programname
,
@ARGV
);
#===DESCRIPTION=================================================================
my
$description
=
"
Description:
\n\t
"
.
"
A tool to filter SAM files using grep like filter based on reference names.
\n
"
.
"
\t
The tool either opens the file specified as input or reads from STDIN when no file is given.
\n
";
my
$usage
=
"
Usage:
\n\t
$0 [OPTIONS] [SAM file] ['<regex>']
\n
";
my
$options
=
"
Options:
\n
"
.
"
\t
-h | --help
\n\t\t
Print the help message; ignore other arguments.
\n
"
.
"
\t
-v | --invert-match
\n\t\t
Invert the sense of matching, to select non-matching entries.
\n
"
.
"
\n
";
my
$info
=
{
description
=>
$description
,
usage
=>
$usage
,
options
=>
$options
,
};
#===MAIN========================================================================
my
%ref
;
my
%query
;
my
$invert
;
my
@requests
;
my
@keep
;
my
$input
;
# Print help if needed
biointbasics::
print_help
(
\
@ARGV
,
$info
);
for
(
@ARGV
)
{
if
(
/^-$/
||
/\.sam$/
)
{
push
@keep
,
$_
;
}
elsif
(
/^-v$/
)
{
$invert
=
"
yes
";
# print STDERR "Using inverted mode: only those lines are reported that do not pass\n";
}
else
{
push
@requests
,
$_
;
}
}
@ARGV
=
@keep
;
biointbasics::
print_help
(
\
@ARGV
,
$info
,
"
ERROR: No regular expressions specified for filtering
\n\n
")
unless
@requests
;
my
$header
=
"
true
";
my
$previousprogram
=
"";
while
(
<>
)
{
my
%hit
;
biointsam::
parse_sam
(
$_
,
\
%ref
,
\
%hit
);
unless
(
%hit
)
{
print
"
$_
\n
";
if
(
/^\@PG\tID:(\S+)/
)
{
$previousprogram
=
$
1
;
}
next
;
}
if
(
$header
)
{
# First line after the header section
my
$text
=
"
\@
PG
\t
ID:
$programname
\t
PN:
$programname
";
$text
.=
"
\t
PP:
$previousprogram
"
if
$previousprogram
;
print
$text
.
"
\t
VN:
$version
\t
CL:
$cmd
\n
";
$header
=
undef
;
}
my
$positive
;
# $positive++ if $filter{ $hit{'RNAME'} };
for
my
$regex
(
@requests
)
{
$positive
++
if
$hit
{'
RNAME
'}
=~
/$regex/
;
}
if
((
$positive
&&
!
$invert
)
||
(
!
$positive
&&
$invert
))
{
# Print if it matched and not inverted mode
# Print if it did not match but invert is selected
print
biointsam::
sam_string
(
\
%hit
),
"
\n
";
}
}
This diff is collapsed.
Click to expand it.