Skip to content
Snippets Groups Projects
Commit fb27c1ca authored by Noordijk, Ben's avatar Noordijk, Ben
Browse files

Fast5 mode of get_one_barcode.py now copies files directly instead of outputting the filenames

parent cbfdf435
No related branches found
No related tags found
1 merge request!3Added data preparation, hyperparameter optimisation, benchmarking code and k-mer library visualisation
......@@ -5,22 +5,23 @@ from pathlib import Path
import pandas as pd
from Bio import SeqIO
import argparse
import shutil
# Never truncate strings:
pd.set_option('display.max_colwidth', None)
def get_barcode_fast5_reads(barcode_id, sequencing_summary_path, sequencing_dir,
out_dir, worker_count):
"""Get list of all fast5 files that belong to a certain barcode
def copy_barcoded_fast5_reads(barcode_id, sequencing_summary_path, sequencing_dir,
out_dir, worker_count):
"""Copy all fast5 files that belong to a certain barcode to out_dir
:param barcode_id: barcode to extract, example 'barcode12'
:type barcode_id: str
:param sequencing_summary_path: path to sequencing_summary.txt
:param sequencing_dir: path to directory containing fast5 reads
:param out_dir: where to save output filenames and what to call the file
:param out_dir: where to save fast5 files
:param worker_count: how many processes to run in parallel
:return: list of fast 5 filenames
:return: list of fast5 filenames
"""
# Index all files
all_files = os.walk(sequencing_dir)
......@@ -38,9 +39,9 @@ def get_barcode_fast5_reads(barcode_id, sequencing_summary_path, sequencing_dir,
# Merge list of lists into one big list
files_of_read = itertools.chain.from_iterable(files_of_read)
print('Saving to file')
series = pd.Series(files_of_read)
series.to_csv(out_dir, index=False, header=False)
os.makedirs(out_dir, exist_ok=True)
for file in files_of_read:
shutil.copy(file, out_dir)
return files_of_read
......@@ -159,8 +160,8 @@ def main():
get_barcode_fastq_reads(barcode, seq_summary_path, read_path,
out_dir, worker_count)
elif args.input_type == 'fast5':
get_barcode_fast5_reads(barcode, seq_summary_path, read_path,
out_dir, worker_count)
copy_barcoded_fast5_reads(barcode, seq_summary_path, read_path,
out_dir, worker_count)
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment