Skip to content
Snippets Groups Projects
Commit 728eaa55 authored by Noordijk, Ben's avatar Noordijk, Ben
Browse files

Expanded uncalled accuracy evaluation script

parent 96817797
No related branches found
No related tags found
1 merge request!3Added data preparation, hyperparameter optimisation, benchmarking code and k-mer library visualisation
......@@ -22,23 +22,37 @@ def read_paf(file_path):
return pd.read_csv(file_path, sep='\t', names=headers, usecols=range(12))
def main():
pd.options.display.width = 0
uncalled_paf_out_path = Path('/home/noord087/lustre_link/mscthesis/benchmarking/output_16s_files/uncalled')
ground_truth_path = Path('/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/ground_truth_with_read_id.csv')
def parse_uncalled(paf_path, ground_truth_path, plot_cm=False):
"""Parse single paf output of uncalled
:param paf_path: path to .paf file output by uncalled. Should be mapped
to single species reference genome. In this case Pseudomonas aeruginosa.
:param ground_truth_path: Path to csv that contains ground truth, this csv
can be output by tools.set_ground_truths_of_reads.py
:param plot_cm: If set to true, plot confusion matrix
:type plot_cm: bool
:return: tuple with f1 score and accuracy
"""
ground_truth = pd.read_csv(ground_truth_path)
uncalled_paf = read_paf(uncalled_paf_out_path / 'uncalled_out_fold0.paf')
uncalled_paf = read_paf(paf_path)
merged_df = uncalled_paf.merge(ground_truth, on='read id')
merged_df['y true'] = merged_df['species'].apply(lambda x: 1 if x.find('Pseudomonas aeruginosa') > 0 else 0)
merged_df['y pred'] = merged_df['Target sequence name'].apply(lambda x: 0 if x == '*' else 1)
f1 = f1_score(merged_df['y true'], merged_df['y pred'])
y_true = merged_df['species'].apply(lambda x: 1 if x.find('Pseudomonas aeruginosa') > 0 else 0)
y_pred = merged_df['Target sequence name'].apply(lambda x: 0 if x == '*' else 1)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
print(f'{f1=}')
ConfusionMatrixDisplay.from_predictions(merged_df['y true'], merged_df['y pred'])
plt.show()
print(f'{accuracy=}')
if plot_cm:
ConfusionMatrixDisplay.from_predictions(y_true, y_pred)
plt.show()
return f1, accuracy
def main():
pd.options.display.width = 0
uncalled_paf_out_path = Path('/home/noord087/lustre_link/mscthesis/benchmarking/output_16s_files/uncalled/uncalled_out_fold0.paf')
ground_truth_path = Path('/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/ground_truth_with_read_id.csv')
parse_uncalled(uncalled_paf_out_path, ground_truth_path)
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment