From 5a542df52f6728704e49833bfb3998df81bb4d98 Mon Sep 17 00:00:00 2001
From: louwe015 <joris.louwen@wur.nl>
Date: Mon, 14 Mar 2022 15:59:34 +0100
Subject: [PATCH] Unify remove_infr_doms functions

---
 ipresto.py                         | 6 +++++-
 ipresto/presto_stat/presto_stat.py | 2 +-
 ipresto/presto_top/presto_top.py   | 8 ++++----
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/ipresto.py b/ipresto.py
index 6f027fb..8dcc61c 100644
--- a/ipresto.py
+++ b/ipresto.py
@@ -18,6 +18,10 @@ from sys import argv
 import logging
 from typing import Union, List
 import time
+import os
+# to account for a weird bug with ldamulticore and numpy:
+# https://github.com/RaRe-Technologies/gensim/issues/1988
+os.environ['OMP_NUM_THREADS'] = '1'
 
 
 def get_commands():
@@ -428,7 +432,7 @@ if __name__ == "__main__":
         bgc_classes_dict = {bgc: 'None' for bgc in bgcs}
 
     if not cmd.top_motifs_model:
-        bgcs = remove_infr_doms_str(bgcs, cmd.min_genes, False,
+        bgcs = remove_infr_doms_str(bgcs, cmd.min_genes, cmd.verbose,
                                     cmd.remove_genes_below_count)
 
     if cmd.amplify:
diff --git a/ipresto/presto_stat/presto_stat.py b/ipresto/presto_stat/presto_stat.py
index 1b0195b..39dabb2 100644
--- a/ipresto/presto_stat/presto_stat.py
+++ b/ipresto/presto_stat/presto_stat.py
@@ -707,7 +707,7 @@ def remove_infr_doms(clusdict, m_gens, verbose, cutoff=3):
     clusdict: dict of {cluster:[(domains_of_a_gene)]}
     m_gens: int, minimal distinct genes a cluster must have to be included
     verbose: bool, if True print additional info
-    cutoff: int, remove genes (domain cominations) that occur below this cutoff
+    cutoff: int, remove genes (domain combinations) that occur less then cutoff
     """
     print(f'\nRemoving domain combinations that occur less than {cutoff} '
           'times')
diff --git a/ipresto/presto_top/presto_top.py b/ipresto/presto_top/presto_top.py
index 0cacaf7..f6328e4 100644
--- a/ipresto/presto_top/presto_top.py
+++ b/ipresto/presto_top/presto_top.py
@@ -51,7 +51,7 @@ def remove_infr_doms_str(clusdict, m_gens, verbose, cutoff=3):
     domcounter = Counter()
     domcounter.update([v for vals in clusdict.values() for v in vals
                        if not v == '-'])
-    deldoms = {key for key in domcounter if domcounter[key] <= cutoff}
+    deldoms = {key for key in domcounter if domcounter[key] < cutoff}
     print('  {} domain combinations are left, {} are removed'.format(
         len(domcounter.keys()) - len(deldoms), len(deldoms)))
     clus_no_deldoms = {}
@@ -126,11 +126,11 @@ def run_lda(domlist, no_below, no_above, num_topics, cores, outfolder,
     # coherence = cm.get_coherence()
     # print('Coherence: {}, num_topics: {}'.format(coherence, num_topics))
     if ldavis:
-        visname = os.path.join(outfolder, 'lda.html')
+        visname = os.path.join(outfolder, 'lda_method-tsne.html')
         print('Running pyLDAvis for visualisation')
         vis = pyLDAvis.gensim.prepare(
-            lda, corpus_bow, dict_lda, sort_topics=False)
-        print('  saving visualisation to html')
+            lda, corpus_bow, dict_lda, sort_topics=False, mds='tsne')
+        print('  saving visualisation with t-sne to html')
         pyLDAvis.save_html(vis, visname)
     return lda, dict_lda, corpus_bow
 
-- 
GitLab