From 331b865d49b75444e432604b0f4a1e23df7b9670 Mon Sep 17 00:00:00 2001
From: Eef Jonkheer <eef@L0148004.wurnet.nl>
Date: Fri, 18 Feb 2022 17:35:30 +0100
Subject: [PATCH] Print error message when no gene identifiers in antismash
 input

---
 .../pantools/pangenome/Classification.java    |  2 +-
 .../pangenome/FunctionalAnnotations.java      | 34 +++++++------------
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java
index d2d23fa4a..e33c3d291 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java
@@ -709,7 +709,7 @@ public class Classification {
         ResourceIterator<Node> bgc_nodes = graphDb.findNodes(bgc_label);
         while (bgc_nodes.hasNext()) {
             Node bgc_node = bgc_nodes.next();
-            int[] address = (int[]) bgc_node.getProperty("address");
+            int[] address = (int[]) bgc_node.getProperty("address_genes");
             try_incr_hashmap(mrnas_with_annotations, address[0] +"", 1);
             try_incr_hashmap(mrnas_with_annotations, address[0] + "_" + address[1], 1);
             Iterable<Relationship> rels = bgc_node.getRelationships();
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java b/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java
index 80d18706e..d3bc7f37c 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java
@@ -5311,14 +5311,8 @@ public class FunctionalAnnotations {
                 String type = features.getJSONObject(j).getString("type");
                 JSONObject qualifiers = features.getJSONObject(j).getJSONObject("qualifiers");
                 if (type.equals("protocluster")) {
-                    //System.out.println(j + " " + type);
-
                     JSONArray product = qualifiers.getJSONArray("product");
                     in_cluster = true;
-                    //String[] loc_array = location.split(":");
-                    //String[] loc_array2 = loc_array[1].split("]");
-                    //cluster_start_pos = Integer.parseInt(loc_array[0].replace("[",""));
-                    //cluster_end_pos = Integer.parseInt(loc_array2[0].replace("]",""));
                     int[] cluster_start_stop_pos = start_stop_from_json_location(location);
                     cluster_start_pos = cluster_start_stop_pos[0];
                     cluster_end_pos = cluster_start_stop_pos[1];
@@ -5329,26 +5323,25 @@ public class FunctionalAnnotations {
                     } else { // multiple products for a cluster
                         cluster_type.append(",").append(cluster_product.toString());
                     }
-                    //System.out.println(j + " CLUSTER TYPE " + cluster_type);
                     if (!earlier_CDS.equals("")){ // for some clusters the CDS property comes before protocluster
-                        //System.out.println(" earlier " + earlier_CDS + " " + earlier_address[0] + " vs " + cluster_start_pos);
                         if (earlier_address[0] >= cluster_start_pos) {
-                            //System.out.println("ja mag");
                             gene_identiers.add(earlier_CDS);
                         }
                     }
                     continue;
                 } else if (type.equals("CDS")) {
-                    earlier_CDS = (String) qualifiers.getJSONArray("gene").get(0);
+                    try {
+                        earlier_CDS = (String) qualifiers.getJSONArray("gene").get(0);
+                    } catch (JSONException jse) {
+                        System.out.println("\n\nUnable to retrieve gene identifiers from the antiSMASH output.\n"
+                            + "This is most likely because antiSMASH was run without GFF file.\n");
+                        System.exit(1);
+                    }
                     earlier_address = start_stop_from_json_location(location);
                 }
                 int gene_end_pos;
                 if (in_cluster && type.equals("gene")) {
-                    //String[] loc_array = location.split(":");
-                    //String[] loc_array2 = loc_array[1].split("]");
                     String gene_id = (String) qualifiers.getJSONArray("ID").get(0);
-                    //int gene_start_pos = Integer.parseInt(loc_array[0].replace("[",""));
-                    //gene_end_pos = Integer.parseInt(loc_array2[0].replace("]",""));
                     int[] gene_start_stop_pos = start_stop_from_json_location(location);
                     int gene_start_pos = gene_start_stop_pos[0];
                     gene_end_pos = gene_start_stop_pos[1];
@@ -5369,11 +5362,9 @@ public class FunctionalAnnotations {
 
                 if (gene_end_pos >= cluster_end_pos) { // the current gene has a coordinate higher as the cluster
                     in_cluster = false;
-                    //System.out.println("END of cluster\n");
-                    if (gene_identiers.isEmpty()){
+                    if (gene_identiers.isEmpty()) {
                         continue;
                     }
-                    //String[] gene_array = gene_identiers.toString().replaceFirst(".$","").split(",");
                     ArrayList<Node> gene_node_list = retrieve_gene_nodes_from_ids(gene_identiers, gene_map, null, log_builder);
                     int[] bgc_address = create_gene_cluster_address(genome_nr, gene_node_list);
                     String cluster_type_str = cluster_type.toString();
@@ -5491,11 +5482,12 @@ public class FunctionalAnnotations {
             bgc_node.setProperty("genome", genome_nr);
             bgc_node.setProperty("sequence", bgc_address[1]);
             bgc_node.setProperty("address_genes", bgc_address);
-            if (cluster_start_pos != 0) {
-                bgc_address[2] = cluster_start_pos;
-                bgc_address[3] = cluster_end_pos;
-                bgc_node.setProperty("address", bgc_address);
+            if (cluster_start_pos == 0) {
+                cluster_start_pos ++;
             }
+            bgc_address[2] = cluster_start_pos;
+            bgc_address[3] = cluster_end_pos;
+            bgc_node.setProperty("address", bgc_address);
             bgc_node.setProperty("annotation_id", annotation_id);
             int position = 1;
             for (Node gene_node : gene_node_list) {
-- 
GitLab