From 5a542df52f6728704e49833bfb3998df81bb4d98 Mon Sep 17 00:00:00 2001 From: louwe015 <joris.louwen@wur.nl> Date: Mon, 14 Mar 2022 15:59:34 +0100 Subject: [PATCH] Unify remove_infr_doms functions --- ipresto.py | 6 +++++- ipresto/presto_stat/presto_stat.py | 2 +- ipresto/presto_top/presto_top.py | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ipresto.py b/ipresto.py index 6f027fb..8dcc61c 100644 --- a/ipresto.py +++ b/ipresto.py @@ -18,6 +18,10 @@ from sys import argv import logging from typing import Union, List import time +import os +# to account for a weird bug with ldamulticore and numpy: +# https://github.com/RaRe-Technologies/gensim/issues/1988 +os.environ['OMP_NUM_THREADS'] = '1' def get_commands(): @@ -428,7 +432,7 @@ if __name__ == "__main__": bgc_classes_dict = {bgc: 'None' for bgc in bgcs} if not cmd.top_motifs_model: - bgcs = remove_infr_doms_str(bgcs, cmd.min_genes, False, + bgcs = remove_infr_doms_str(bgcs, cmd.min_genes, cmd.verbose, cmd.remove_genes_below_count) if cmd.amplify: diff --git a/ipresto/presto_stat/presto_stat.py b/ipresto/presto_stat/presto_stat.py index 1b0195b..39dabb2 100644 --- a/ipresto/presto_stat/presto_stat.py +++ b/ipresto/presto_stat/presto_stat.py @@ -707,7 +707,7 @@ def remove_infr_doms(clusdict, m_gens, verbose, cutoff=3): clusdict: dict of {cluster:[(domains_of_a_gene)]} m_gens: int, minimal distinct genes a cluster must have to be included verbose: bool, if True print additional info - cutoff: int, remove genes (domain cominations) that occur below this cutoff + cutoff: int, remove genes (domain combinations) that occur less then cutoff """ print(f'\nRemoving domain combinations that occur less than {cutoff} ' 'times') diff --git a/ipresto/presto_top/presto_top.py b/ipresto/presto_top/presto_top.py index 0cacaf7..f6328e4 100644 --- a/ipresto/presto_top/presto_top.py +++ b/ipresto/presto_top/presto_top.py @@ -51,7 +51,7 @@ def remove_infr_doms_str(clusdict, m_gens, verbose, cutoff=3): domcounter = Counter() domcounter.update([v for vals in clusdict.values() for v in vals if not v == '-']) - deldoms = {key for key in domcounter if domcounter[key] <= cutoff} + deldoms = {key for key in domcounter if domcounter[key] < cutoff} print(' {} domain combinations are left, {} are removed'.format( len(domcounter.keys()) - len(deldoms), len(deldoms))) clus_no_deldoms = {} @@ -126,11 +126,11 @@ def run_lda(domlist, no_below, no_above, num_topics, cores, outfolder, # coherence = cm.get_coherence() # print('Coherence: {}, num_topics: {}'.format(coherence, num_topics)) if ldavis: - visname = os.path.join(outfolder, 'lda.html') + visname = os.path.join(outfolder, 'lda_method-tsne.html') print('Running pyLDAvis for visualisation') vis = pyLDAvis.gensim.prepare( - lda, corpus_bow, dict_lda, sort_topics=False) - print(' saving visualisation to html') + lda, corpus_bow, dict_lda, sort_topics=False, mds='tsne') + print(' saving visualisation with t-sne to html') pyLDAvis.save_html(vis, visname) return lda, dict_lda, corpus_bow -- GitLab