diff --git a/bin/fastaCut.pl b/bin/fastaCut.pl index e15348f7f09c75a4e79d76c801806e8c67838050..af1fe5cc617cea95228ef518fd5a51f44176ace8 100755 --- a/bin/fastaCut.pl +++ b/bin/fastaCut.pl @@ -11,7 +11,7 @@ my $finish = $ARGV[3]; my $name = $ARGV[4]; -my $verbose = 1; +my $verbose = 0; die "NO INPUT FASTA" if ! defined $fasta; die "INPUT FASTA $fasta DOES NOT EXISTS" if ! -f $fasta; die "INPUT FASTA $fasta IS EMPTY" if ! -s $fasta; @@ -21,7 +21,15 @@ die "NO FINISH DEFINED" if ! defined $finish; die "NO NAME DEFINED" if ! defined $name; die "START $start NOT NUMERIC" if $start !~ /^\d+$/; -die "END $finish NOT NUMERIC" if $finish !~ /^\d+$/; +die "END $finish NOT NUMERIC" if $finish !~ /^\d+$/; + +print STDERR " + INFASTA '$fasta' + CHR '$chrom' + START '$start' + FINISH '$finish' + NAME '$name' +"; my $gene = &readFasta($fasta, $chrom, $start, $finish, $name); #&returner($gene, $start, $end, $name); @@ -40,22 +48,6 @@ my $gene = &readFasta($fasta, $chrom, $start, $finish, $name); # ./cut.pl WM276GBFF10_06.fasta CP000298_CGB_M1520C_Translation_elongation_factor_EF1_alpha__putative_323075_324730 673 1371 tef1 > tef1.276.txt -sub returner -{ - my $gene = $_[0]; - my $start = $_[1]; - my $end = $_[2]; - my $name = $_[3]; - - #$start = $start - 500; - #$end = $end + 500; - - print ">$fasta\_$chrom\_$start\_$end\_$name\n"; - - print join("", @$gene[($start-1) .. $end]); -} - - sub readFasta { @@ -88,7 +80,7 @@ sub readFasta while (<FILE>) { chomp; - if (( /^>/) && ($on)) + if (( /^\>/) && ($on)) { $on = 0; } @@ -105,27 +97,27 @@ sub readFasta my $cut2 = -1; if ( ! $seq && $seqBeg <= $begin && $seqEnd >= $begin) { $cut1 = $begin - $seqBeg; - printf "subtracting CUT1 %3d - %3d = %d\n", $seqBeg, $begin, $cut1 if $verbose; + printf STDERR "subtracting CUT1 %3d - %3d = %d\n", $seqBeg, $begin, $cut1 if $verbose; } if ( $end >= $seqBeg && $seqEnd >= $end ) { $cut2 = $end - $seqBeg + 1; - printf "subtracting CUT2 %3d - %3d = %d\n", $end, $seqBeg, $cut2 if $verbose; + printf STDERR "subtracting CUT2 %3d - %3d = %d\n", $end, $seqBeg, $cut2 if $verbose; } - printf "LINE %80s LENGTH %2d POS %5d END POS %5d CUT1 %3d CUT2 %3d ", $_, $seqLen, $seqBeg, $seqEnd, $cut1, $cut2 if $verbose; + printf STDERR "LINE %80s LENGTH %2d POS %5d END POS %5d CUT1 %3d CUT2 %3d ", $_, $seqLen, $seqBeg, $seqEnd, $cut1, $cut2 if $verbose; if ( $cut1 != -1 && $cut2 == -1 ) { if ($verbose) { printf "T1 %3d - %3d ", $cut1, $seqEnd } ; $_ = substr( $_, $cut1 ) } # get from end elsif ( $cut1 == -1 && $cut2 != -1 ) { if ($verbose) { printf "T2 0 - %3d ", $cut2 } ; $_ = substr( $_, 0 , $cut2 ) } # get from begining elsif ( $cut1 != -1 && $cut2 != -1 ) { if ($verbose) { printf "T3 %3d - %3d ", $cut1, ( $cut2 - $cut1 ) } ; $_ = substr( $_, $cut1, ($cut2 - $cut1) ) } # indside - else { if ($verbose) { print "T4 ALL " } ; } # all + else { if ($verbose) { print "T4 ALL " } ; } # all - if ( $seqBeg < $begin && $seqEnd < $begin ) { if ($verbose) { print " NEXTING ($seqBeg < $seqEnd < $begin)\n"; } next }; + if ( $seqBeg < $begin && $seqEnd < $begin ) { if ($verbose) { print STDERR " NEXTING ($seqBeg < $seqEnd < $begin)\n"; } next }; if ( ! defined $_ ) { die "NO LINE\n"; } $seq .= $_; - print "$_\n" if ($verbose); + print STDERR "$_\n" if $verbose; last if $seqEnd >= $end; #push(@gene, split("",$_)); @@ -137,7 +129,7 @@ sub readFasta #} } - if (/^>$chrom/) + if (/^\>$chrom/) { #print ">$chrom\n"; $on = 1; @@ -145,18 +137,18 @@ sub readFasta } print "$begin $seq $end (" , length($seq), "/",($end - $begin + 1),")\n" if ($verbose); if ( length($seq) != ($end - $begin + 1) ) { - print "COULD NOT EXTRACT FULL SEQUENCE LENGTH\n"; - print " REQUESTED SIZE ", ($end - $begin + 1) , "\n"; - print " OBSERVED SIZE ", length($seq) , "\n"; - print " TOTAL SEQ LENGTH ", $totalSeqLen , "\n"; - print " FOUND CHROMOSOME ", ($on ? 'yes' : 'no') , "\n"; + print STDERR "COULD NOT EXTRACT FULL SEQUENCE LENGTH\n"; + print STDERR " REQUESTED SIZE ", ($end - $begin + 1) , "\n"; + print STDERR " OBSERVED SIZE ", length($seq) , "\n"; + print STDERR " TOTAL SEQ LENGTH ", $totalSeqLen , "\n"; + print STDERR " FOUND CHROMOSOME ", ($on ? 'yes' : 'no') , "\n"; exit 2; } if ( $seq ) { $seq =~ s/(.{60})/$1\n/g; - print "$seq\n"; + print $seq, "\n"; } else { die "NOT ABLE TO EXTRACT SEQUENCE\n"; } @@ -164,5 +156,5 @@ sub readFasta undef $chromo; close FILE; # print "GOT GENE\n"; - return \@gene; + #return \@gene; }