Skip to content
Snippets Groups Projects
Commit 22b33bab authored by Lannoy, Carlos de's avatar Lannoy, Carlos de
Browse files

small fixes abundance estimation

parent 3f6cd45b
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@ import argparse, subprocess, re
from os.path import basename, splitext
from tempfile import TemporaryDirectory
import numpy as np
import pandas as pd
COMPLEMENT_DICT = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
......@@ -52,7 +53,7 @@ kmer_list = []
count_df = pd.concat([target_df] + bg_df_list, axis=1)
for bg_fn in bg_fn_list:
count_df.loc[:, f'ratio_{bg_fn}'] = count_df.target / count_df.loc[:, bg_fn]
ratio_series = count_df.loc[:, f'ratio_{bg_fn}'].sort_values()
ratio_series = count_df.loc[~np.in1d(count_df.index, kmer_list), f'ratio_{bg_fn}'].sort_values()
kmer_list.extend(ratio_series.iloc[-kmers_per_bg:].index)
kmer_list.extend(ratio_series.iloc[:kmers_per_bg].index)
......
......@@ -60,8 +60,9 @@ def main(args):
read_manager_process.join()
print(f'rutime was {run_time.seconds} s')
if abundance_mode:
abundance_array = abundance_array / max(abundance_array.sum(), 1)
abundance_txt = 'kmer,frequency\n' + '\n'.join([f'{km},{ab}' for km, ab in zip(kmer_list, abundance_array)])
freq_array = abundance_array / max(abundance_array.sum(), 1)
abundance_txt = 'kmer,abundance,frequency\n' + \
'\n'.join([f'{km},{ab}' for km, ab in zip(kmer_list, abundance_array, freq_array)])
with open(f'{args.out_dir}abundance_estimation.csv', 'w') as fh:
fh.write(abundance_txt)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment