Skip to content
Snippets Groups Projects
Commit 15f60d2d authored by Jasper Koehorst's avatar Jasper Koehorst
Browse files

add all keys to each sample and sort the biom file to ensure that metadata is ordered

parent 5f573599
No related branches found
No related tags found
No related merge requests found
......@@ -173,6 +173,8 @@ def biom_preformatter():
def tsv_to_biom():
from biom import load_table
from collections import OrderedDict
# merged.tsv
biom_content = load_table("merged.tsv")
......@@ -187,18 +189,35 @@ def tsv_to_biom():
# Skip lines that are not in this biom object
if identifier not in sample_ids: continue
if identifier not in metadata:
metadata[identifier] = {}
metadata[identifier] = OrderedDict()
metadata[identifier][key] = value
# Bug in biom reader, all metadata need to have the same keys in the same order
keys = set()
for line in open("metadata.tsv"):
line = line.strip()
if "\t" not in line: continue
# print(line)
identifier, key, value = line.split("\t")
keys.add(key)
keys = sorted(keys)
for identifier in metadata:
for key in keys:
if key not in metadata[identifier]:
metadata[identifier][key] = "None"
# metadata[identifier] = sorted(metadata[identifier])
# Add metadata
biom_content.add_metadata(metadata)
biom_content.type = "OTU table"
json_data = biom_content.to_json(generated_by="UNLOCK conversion module")
# Create Python object from JSON string data
obj = json.loads(json_data)
# Pretty Print JSON
json_formatted_str = json.dumps(obj, indent=4)
json_formatted_str = json.dumps(obj, indent=4, sort_keys=True)
biom_file = job.split("/")[-1].replace(".job", ".biom")
print("Writing biom file to", biom_file)
print(json_formatted_str, file=open(biom_file, "w"))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment