From 15f60d2d62fdc6f4470e20010f72e2da11ef8275 Mon Sep 17 00:00:00 2001 From: Jasper Koehorst <> Date: Fri, 17 Sep 2021 10:37:49 +0200 Subject: [PATCH] add all keys to each sample and sort the biom file to ensure that metadata is ordered --- ngtax_to_biom.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/ngtax_to_biom.py b/ngtax_to_biom.py index dd14023..2d0b2f7 100755 --- a/ngtax_to_biom.py +++ b/ngtax_to_biom.py @@ -173,6 +173,8 @@ def biom_preformatter(): def tsv_to_biom(): from biom import load_table + from collections import OrderedDict + # merged.tsv biom_content = load_table("merged.tsv") @@ -187,18 +189,35 @@ def tsv_to_biom(): # Skip lines that are not in this biom object if identifier not in sample_ids: continue if identifier not in metadata: - metadata[identifier] = {} + metadata[identifier] = OrderedDict() metadata[identifier][key] = value + # Bug in biom reader, all metadata need to have the same keys in the same order + keys = set() + for line in open("metadata.tsv"): + line = line.strip() + if "\t" not in line: continue + # print(line) + identifier, key, value = line.split("\t") + keys.add(key) + keys = sorted(keys) + + for identifier in metadata: + for key in keys: + if key not in metadata[identifier]: + metadata[identifier][key] = "None" + # metadata[identifier] = sorted(metadata[identifier]) + + # Add metadata biom_content.add_metadata(metadata) - + biom_content.type = "OTU table" json_data = biom_content.to_json(generated_by="UNLOCK conversion module") # Create Python object from JSON string data obj = json.loads(json_data) # Pretty Print JSON - json_formatted_str = json.dumps(obj, indent=4) + json_formatted_str = json.dumps(obj, indent=4, sort_keys=True) biom_file = job.split("/")[-1].replace(".job", ".biom") print("Writing biom file to", biom_file) print(json_formatted_str, file=open(biom_file, "w")) -- GitLab