Skip to content
Snippets Groups Projects
Commit 5a542df5 authored by Louwen, Joris's avatar Louwen, Joris
Browse files

Unify remove_infr_doms functions

parent 6cdcecdf
Branches
Tags
1 merge request!1iPRESTO refactor
...@@ -18,6 +18,10 @@ from sys import argv ...@@ -18,6 +18,10 @@ from sys import argv
import logging import logging
from typing import Union, List from typing import Union, List
import time import time
import os
# to account for a weird bug with ldamulticore and numpy:
# https://github.com/RaRe-Technologies/gensim/issues/1988
os.environ['OMP_NUM_THREADS'] = '1'
def get_commands(): def get_commands():
...@@ -428,7 +432,7 @@ if __name__ == "__main__": ...@@ -428,7 +432,7 @@ if __name__ == "__main__":
bgc_classes_dict = {bgc: 'None' for bgc in bgcs} bgc_classes_dict = {bgc: 'None' for bgc in bgcs}
if not cmd.top_motifs_model: if not cmd.top_motifs_model:
bgcs = remove_infr_doms_str(bgcs, cmd.min_genes, False, bgcs = remove_infr_doms_str(bgcs, cmd.min_genes, cmd.verbose,
cmd.remove_genes_below_count) cmd.remove_genes_below_count)
if cmd.amplify: if cmd.amplify:
......
...@@ -707,7 +707,7 @@ def remove_infr_doms(clusdict, m_gens, verbose, cutoff=3): ...@@ -707,7 +707,7 @@ def remove_infr_doms(clusdict, m_gens, verbose, cutoff=3):
clusdict: dict of {cluster:[(domains_of_a_gene)]} clusdict: dict of {cluster:[(domains_of_a_gene)]}
m_gens: int, minimal distinct genes a cluster must have to be included m_gens: int, minimal distinct genes a cluster must have to be included
verbose: bool, if True print additional info verbose: bool, if True print additional info
cutoff: int, remove genes (domain cominations) that occur below this cutoff cutoff: int, remove genes (domain combinations) that occur less then cutoff
""" """
print(f'\nRemoving domain combinations that occur less than {cutoff} ' print(f'\nRemoving domain combinations that occur less than {cutoff} '
'times') 'times')
......
...@@ -51,7 +51,7 @@ def remove_infr_doms_str(clusdict, m_gens, verbose, cutoff=3): ...@@ -51,7 +51,7 @@ def remove_infr_doms_str(clusdict, m_gens, verbose, cutoff=3):
domcounter = Counter() domcounter = Counter()
domcounter.update([v for vals in clusdict.values() for v in vals domcounter.update([v for vals in clusdict.values() for v in vals
if not v == '-']) if not v == '-'])
deldoms = {key for key in domcounter if domcounter[key] <= cutoff} deldoms = {key for key in domcounter if domcounter[key] < cutoff}
print(' {} domain combinations are left, {} are removed'.format( print(' {} domain combinations are left, {} are removed'.format(
len(domcounter.keys()) - len(deldoms), len(deldoms))) len(domcounter.keys()) - len(deldoms), len(deldoms)))
clus_no_deldoms = {} clus_no_deldoms = {}
...@@ -126,11 +126,11 @@ def run_lda(domlist, no_below, no_above, num_topics, cores, outfolder, ...@@ -126,11 +126,11 @@ def run_lda(domlist, no_below, no_above, num_topics, cores, outfolder,
# coherence = cm.get_coherence() # coherence = cm.get_coherence()
# print('Coherence: {}, num_topics: {}'.format(coherence, num_topics)) # print('Coherence: {}, num_topics: {}'.format(coherence, num_topics))
if ldavis: if ldavis:
visname = os.path.join(outfolder, 'lda.html') visname = os.path.join(outfolder, 'lda_method-tsne.html')
print('Running pyLDAvis for visualisation') print('Running pyLDAvis for visualisation')
vis = pyLDAvis.gensim.prepare( vis = pyLDAvis.gensim.prepare(
lda, corpus_bow, dict_lda, sort_topics=False) lda, corpus_bow, dict_lda, sort_topics=False, mds='tsne')
print(' saving visualisation to html') print(' saving visualisation with t-sne to html')
pyLDAvis.save_html(vis, visname) pyLDAvis.save_html(vis, visname)
return lda, dict_lda, corpus_bow return lda, dict_lda, corpus_bow
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment