minor fix

822be4c4 · lotte.pronk · 923c1c98 · 822be4c4 · 822be4c4 · 822be4c4
Commit 822be4c4 authored 2 years ago by lotte.pronk
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,5 @@
 /whokaryote_scripts/test_prediction.py
 /whokaryote_scripts/train_classifier.py
 /whokaryote_scripts/train_kmers_only.py
-/whokaryote/scripts/test_kmer_predictions.py
\ No newline at end of file
+/whokaryote/scripts/test_kmer_predictions.py
+/Whokaryote.egg-info/
--- a/bin/whokaryote.py
+++ b/bin/whokaryote.py
@@ -15,7 +15,7 @@ parser.add_argument("--f", action='store_true', help="If you want new multifasta
                                                     "prokaryotes. This can take a long time.")
 parser.add_argument("--test", action='store_true', help="If you want to test it on a known dataset.")
 parser.add_argument("--train", help="For training an RF on your own dataset. Provide name of RF output file.")
-parser.add_argument("--minsize", default=5000, help="Select a minimum contig size in bp, default = 5000. Accuracy on\
+parser.add_argument("--minsize", default=5000, help="Select a minimum contig size in bp, default = 5000. Accuracy on \
 contigs below 5000 is lower.")
 #  parser.add_argument("--log", action='store_true', help="If you want a log file.")
 parser.add_argument("--model", default="T", help="Choose the stand-alone model or the tiara-integrated model: S or T.\
@@ -107,7 +107,7 @@ if args.train:

 if args.f:
    print("Writing eukaryotic and prokaryotic contigs to separate fasta files. This can take very long...")
-    script_path = os.path.join(str(Path(__file__).parents[1]), "whokaryote_scripts", "get_euk_prok_fasta.sh")
+    script_path = os.path.join(str(Path(__file__).parents[2]), "whokaryote_scripts/data", "get_euk_prok_fasta.sh")
    input_file = args.contigs
    output_file = os.path.join(args.outdir, "lin_contigs.fasta")
    euk_headers = os.path.join(args.outdir, "eukaryote_contig_headers.txt")

--- a/whokaryote_scripts/calculate_features.py
+++ b/whokaryote_scripts/calculate_features.py
@@ -134,7 +134,7 @@ def calc_features(contig_file, outfile):
                if gene_list != "empty":   # This is the gene list from the previous contig
                    if len(rbs_list) == 0:
                        rbs_ratio = np.nan
-                        print("RBS_list = 0", rbs_ratio)
+                        #  print("RBS_list = 0", rbs_ratio)
                    if len(rbs_list) > 0:
                        rbs_ratio = 1 - (rbs_list.count("None") / len(rbs_list))


--- a/whokaryote_scripts/predict_class.py
+++ b/whokaryote_scripts/predict_class.py
@@ -4,6 +4,9 @@ import joblib
 from pathlib import Path
 import os

+import warnings
+warnings.filterwarnings('ignore')
+

 def add_tiara(dataframe, outdir):
    tiara_list = []
@@ -78,9 +81,9 @@ def predict_class(feature_path, outdir, model):
    #  features = pd.get_dummies(feature_df)
    features = feature_df.dropna()
    #  features = np.array(features)
-    print("Used features:\n", features.describe())
-    print(features.shape)
-    print("Used model: ", model_file)
+    #  print("Used features:\n", features.describe())
+    #  print(features.shape)
+    #  print("Used model: ", model_file)
    loaded_rf = joblib.load(os.path.join(str(Path(__file__).parents[1]), "whokaryote_scripts/data", model_file))

    predictions = loaded_rf.predict(features)