From a8b2516797bdc491a836018af88e29fd43eb40b7 Mon Sep 17 00:00:00 2001 From: "Stege, Paul" <paul.stege@wur.nl> Date: Thu, 3 Nov 2022 17:03:51 +0000 Subject: [PATCH] Upload New File --- 09_mimic_s2_hmmscan_parsed.pl | 88 +++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 09_mimic_s2_hmmscan_parsed.pl diff --git a/09_mimic_s2_hmmscan_parsed.pl b/09_mimic_s2_hmmscan_parsed.pl new file mode 100644 index 0000000..5b0d2b3 --- /dev/null +++ b/09_mimic_s2_hmmscan_parsed.pl @@ -0,0 +1,88 @@ +#============================================================================================= +#author Neeraj Kumar et al., 2021; https://github.com/ClavelLab/MiMiC +#date 02-2021 +#adjusted by Paul Stege +#script script parsed the hmmscan out put for step 3 +#software perl +#============================================================================================= +# usage +# perl 09_mimic_s2_hmmscan_parsed.pl PathWorkDir optionforProdigal +# PathWorkDir should contain the folder /08_mimic_output/output_faa... +# optionforProdigal, either meta or single, states if working with metagenome or single genomes +#============================================================================================= +#!/home/user/miniconda/envs/env_rational/bin/perl +use strict; +use warnings; +use File::Copy; + +my $INDIR = $ARGV[0]; # PathWorkDir. +my $TYPE = $ARGV[1]; # optionforProdigal, directs to correct output folder. + +sub main { + my $maindir = "${INDIR}/08_mimic_output"; + my $directory = "${INDIR}/08_mimic_output/hmmscan_parsed_${TYPE}"; + print "$directory\n"; + mkdir $maindir; + unless(mkdir $directory) { + die "Unable to create $directory\n"; + } +} + + +main(); + +my $srcdir = "${INDIR}/08_mimic_output/output_pfam_${TYPE}"; #source directory +my $dest = "${INDIR}/08_mimic_output/hmmscan_parsed_${TYPE}/"; #target directory + +opendir (DIR, $srcdir) or die "can not open $srcdir,$!"; + +my @files; #programe does not work if you do not defined variable a local before using it.(very important) +my @line_array; +my @column1; +my ($target_name, $accession_FS, $queryname, $accession,$E_value_FS, $score_FS,$bias_FS,$E_value_BD, $score_BD, $bias_BD, $exp_DNE, $reg_DNE, $clu_DNE, $ov_DNE, +$env_DNE,$dom_DNE,$rep_DNE,$inc_DNE,$description_of_target)=0; + +@files = grep {!/^\.+$/ } readdir(DIR); #grep all filenames in array + +# save log +my $logfile = "${INDIR}/09_mimic_s2_hmmscan_parsed_${TYPE}.pl.log"; +open(FH, '>', $logfile) or die $!; +print FH "input directory is: $srcdir \n"; +print FH "output directory is: $dest \n"; +close(FH); + +# run samples +foreach my $file (@files) + { + my $old = "$srcdir/$file"; + my $new = "$dest/$file"; + open(my $fh,'<:encoding(UTF-8)',$old) or die "could not open file '$old'$! "; + open(my $fh_t, '>', $new) or die "Could not open file '$new' $!"; + while (my $line = <$fh>) + { + unless ($line =~ /#/) + { + #print $line; + #@line_array = split('\s+',$line);#''\s+' split the line based on multiple array. + #@column1 = ("$line_array[0]\t",$line_array[1]); + ($target_name ,$accession_FS, $queryname, $accession, $E_value_FS, $score_FS, $bias_FS, $E_value_BD, $score_BD, $bias_BD, $exp_DNE,$reg_DNE, $clu_DNE, $ov_DNE, $env_DNE, $dom_DNE, $rep_DNE, $inc_DNE, $description_of_target)=split('\s+',$line); + unless ( $E_value_FS >= 0.01 ) + { + + print $fh_t "$accession_FS\n"; + #print $fh_t "$queryname\t$accession_FS\t$E_value_FS\n"; + #print $fh_t "$E_value_FS\n"; + print "$accession_FS\t"; + print "$E_value_FS\n"; + } + print "@column1\n"; + + } + } +# move($old, $dest) or die "Move $old -> $dest failed: $!"; #to move files from one folder to another. + + open(FH, '>>', $logfile) or die $!; + print FH "completed sample: $file \n"; + close(FH); + + } \ No newline at end of file -- GitLab