From a8b2516797bdc491a836018af88e29fd43eb40b7 Mon Sep 17 00:00:00 2001
From: "Stege, Paul" <paul.stege@wur.nl>
Date: Thu, 3 Nov 2022 17:03:51 +0000
Subject: [PATCH] Upload New File

---
 09_mimic_s2_hmmscan_parsed.pl | 88 +++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 09_mimic_s2_hmmscan_parsed.pl

diff --git a/09_mimic_s2_hmmscan_parsed.pl b/09_mimic_s2_hmmscan_parsed.pl
new file mode 100644
index 0000000..5b0d2b3
--- /dev/null
+++ b/09_mimic_s2_hmmscan_parsed.pl
@@ -0,0 +1,88 @@
+#=============================================================================================
+#author			Neeraj Kumar et al., 2021; https://github.com/ClavelLab/MiMiC
+#date			02-2021
+#adjusted by		Paul Stege
+#script			script parsed the hmmscan out put for step 3 
+#software		perl
+#=============================================================================================
+# usage
+# perl 09_mimic_s2_hmmscan_parsed.pl PathWorkDir optionforProdigal
+# PathWorkDir should contain the folder /08_mimic_output/output_faa...
+# optionforProdigal, either meta or single, states if working with metagenome or single genomes
+#=============================================================================================
+#!/home/user/miniconda/envs/env_rational/bin/perl 
+use strict;
+use warnings;
+use File::Copy;
+
+my $INDIR = $ARGV[0]; 	# PathWorkDir.
+my $TYPE = $ARGV[1]; 	# optionforProdigal, directs to correct output folder.
+
+sub main {
+    my $maindir = "${INDIR}/08_mimic_output";
+    my $directory = "${INDIR}/08_mimic_output/hmmscan_parsed_${TYPE}";
+    print "$directory\n";
+    mkdir $maindir;
+    unless(mkdir $directory) {
+        die "Unable to create $directory\n";
+    }
+}
+
+
+main();
+
+my $srcdir = "${INDIR}/08_mimic_output/output_pfam_${TYPE}"; #source directory
+my $dest = "${INDIR}/08_mimic_output/hmmscan_parsed_${TYPE}/"; #target directory
+
+opendir (DIR, $srcdir) or die "can not open $srcdir,$!";
+
+my @files; #programe does not work if you do not defined variable a local before using it.(very important)
+my @line_array;
+my @column1;
+my ($target_name, $accession_FS, $queryname, $accession,$E_value_FS, $score_FS,$bias_FS,$E_value_BD, $score_BD, $bias_BD, $exp_DNE, $reg_DNE, $clu_DNE, $ov_DNE,
+$env_DNE,$dom_DNE,$rep_DNE,$inc_DNE,$description_of_target)=0;
+
+@files = grep {!/^\.+$/ } readdir(DIR); #grep all filenames in array
+
+# save log
+my $logfile = "${INDIR}/09_mimic_s2_hmmscan_parsed_${TYPE}.pl.log";
+open(FH, '>', $logfile) or die $!;
+print FH "input directory is: $srcdir \n";
+print FH "output directory is: $dest \n";
+close(FH);
+
+# run samples
+foreach my $file (@files)
+	{
+		my $old = "$srcdir/$file";
+		my $new = "$dest/$file";
+		open(my $fh,'<:encoding(UTF-8)',$old) or die "could not open file '$old'$! ";
+		open(my $fh_t, '>', $new) or die "Could not open file '$new' $!";
+		while (my $line = <$fh>) 
+		{ 
+			unless ($line =~ /#/) 
+			{ 
+				#print $line;
+				#@line_array = split('\s+',$line);#''\s+' split the line based on multiple array.
+				#@column1 = ("$line_array[0]\t",$line_array[1]);
+				($target_name ,$accession_FS, $queryname,  $accession,  $E_value_FS, $score_FS, $bias_FS,  $E_value_BD, $score_BD, $bias_BD,  $exp_DNE,$reg_DNE, $clu_DNE, $ov_DNE, $env_DNE, $dom_DNE, $rep_DNE, $inc_DNE, $description_of_target)=split('\s+',$line);
+				unless ( $E_value_FS >= 0.01 )
+					{
+						
+						print $fh_t "$accession_FS\n";
+						#print $fh_t "$queryname\t$accession_FS\t$E_value_FS\n";
+						#print $fh_t "$E_value_FS\n";
+						print "$accession_FS\t";
+						print "$E_value_FS\n";
+					}
+				print "@column1\n";
+				 
+			} 
+		}
+#		move($old, $dest) or die "Move $old -> $dest failed: $!"; #to move files from one folder to another.
+
+	open(FH, '>>', $logfile) or die $!;
+	print FH "completed sample: $file \n";
+	close(FH);
+
+	}  
\ No newline at end of file
-- 
GitLab