From 15f60d2d62fdc6f4470e20010f72e2da11ef8275 Mon Sep 17 00:00:00 2001
From: Jasper Koehorst <>
Date: Fri, 17 Sep 2021 10:37:49 +0200
Subject: [PATCH] add all keys to each sample and sort the biom file to ensure
 that metadata is ordered

---
 ngtax_to_biom.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/ngtax_to_biom.py b/ngtax_to_biom.py
index dd14023..2d0b2f7 100755
--- a/ngtax_to_biom.py
+++ b/ngtax_to_biom.py
@@ -173,6 +173,8 @@ def biom_preformatter():
 
 def tsv_to_biom():
     from biom import load_table
+    from collections import OrderedDict
+
     # merged.tsv
     biom_content = load_table("merged.tsv")
     
@@ -187,18 +189,35 @@ def tsv_to_biom():
         # Skip lines that are not in this biom object
         if identifier not in sample_ids: continue
         if identifier not in metadata:
-            metadata[identifier] = {}
+            metadata[identifier] = OrderedDict()
         metadata[identifier][key] = value
     
+    # Bug in biom reader, all metadata need to have the same keys in the same order
+    keys = set()
+    for line in open("metadata.tsv"):
+        line = line.strip()
+        if "\t" not in line: continue
+        # print(line)
+        identifier, key, value = line.split("\t")
+        keys.add(key)
+    keys = sorted(keys)
+
+    for identifier in metadata:
+        for key in keys:
+            if key not in metadata[identifier]:
+                metadata[identifier][key] = "None"
+        # metadata[identifier] = sorted(metadata[identifier])
+    
+    # Add metadata
     biom_content.add_metadata(metadata)
-
+    biom_content.type = "OTU table"
     json_data = biom_content.to_json(generated_by="UNLOCK conversion module")
     
     # Create Python object from JSON string data
     obj = json.loads(json_data)
     
     # Pretty Print JSON
-    json_formatted_str = json.dumps(obj, indent=4)
+    json_formatted_str = json.dumps(obj, indent=4, sort_keys=True)
     biom_file = job.split("/")[-1].replace(".job", ".biom")
     print("Writing biom file to", biom_file)
     print(json_formatted_str, file=open(biom_file, "w"))
-- 
GitLab