From 331b865d49b75444e432604b0f4a1e23df7b9670 Mon Sep 17 00:00:00 2001 From: Eef Jonkheer <eef@L0148004.wurnet.nl> Date: Fri, 18 Feb 2022 17:35:30 +0100 Subject: [PATCH] Print error message when no gene identifiers in antismash input --- .../pantools/pangenome/Classification.java | 2 +- .../pangenome/FunctionalAnnotations.java | 34 +++++++------------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java index d2d23fa4a..e33c3d291 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java @@ -709,7 +709,7 @@ public class Classification { ResourceIterator<Node> bgc_nodes = graphDb.findNodes(bgc_label); while (bgc_nodes.hasNext()) { Node bgc_node = bgc_nodes.next(); - int[] address = (int[]) bgc_node.getProperty("address"); + int[] address = (int[]) bgc_node.getProperty("address_genes"); try_incr_hashmap(mrnas_with_annotations, address[0] +"", 1); try_incr_hashmap(mrnas_with_annotations, address[0] + "_" + address[1], 1); Iterable<Relationship> rels = bgc_node.getRelationships(); diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java b/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java index 80d18706e..d3bc7f37c 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java @@ -5311,14 +5311,8 @@ public class FunctionalAnnotations { String type = features.getJSONObject(j).getString("type"); JSONObject qualifiers = features.getJSONObject(j).getJSONObject("qualifiers"); if (type.equals("protocluster")) { - //System.out.println(j + " " + type); - JSONArray product = qualifiers.getJSONArray("product"); in_cluster = true; - //String[] loc_array = location.split(":"); - //String[] loc_array2 = loc_array[1].split("]"); - //cluster_start_pos = Integer.parseInt(loc_array[0].replace("[","")); - //cluster_end_pos = Integer.parseInt(loc_array2[0].replace("]","")); int[] cluster_start_stop_pos = start_stop_from_json_location(location); cluster_start_pos = cluster_start_stop_pos[0]; cluster_end_pos = cluster_start_stop_pos[1]; @@ -5329,26 +5323,25 @@ public class FunctionalAnnotations { } else { // multiple products for a cluster cluster_type.append(",").append(cluster_product.toString()); } - //System.out.println(j + " CLUSTER TYPE " + cluster_type); if (!earlier_CDS.equals("")){ // for some clusters the CDS property comes before protocluster - //System.out.println(" earlier " + earlier_CDS + " " + earlier_address[0] + " vs " + cluster_start_pos); if (earlier_address[0] >= cluster_start_pos) { - //System.out.println("ja mag"); gene_identiers.add(earlier_CDS); } } continue; } else if (type.equals("CDS")) { - earlier_CDS = (String) qualifiers.getJSONArray("gene").get(0); + try { + earlier_CDS = (String) qualifiers.getJSONArray("gene").get(0); + } catch (JSONException jse) { + System.out.println("\n\nUnable to retrieve gene identifiers from the antiSMASH output.\n" + + "This is most likely because antiSMASH was run without GFF file.\n"); + System.exit(1); + } earlier_address = start_stop_from_json_location(location); } int gene_end_pos; if (in_cluster && type.equals("gene")) { - //String[] loc_array = location.split(":"); - //String[] loc_array2 = loc_array[1].split("]"); String gene_id = (String) qualifiers.getJSONArray("ID").get(0); - //int gene_start_pos = Integer.parseInt(loc_array[0].replace("[","")); - //gene_end_pos = Integer.parseInt(loc_array2[0].replace("]","")); int[] gene_start_stop_pos = start_stop_from_json_location(location); int gene_start_pos = gene_start_stop_pos[0]; gene_end_pos = gene_start_stop_pos[1]; @@ -5369,11 +5362,9 @@ public class FunctionalAnnotations { if (gene_end_pos >= cluster_end_pos) { // the current gene has a coordinate higher as the cluster in_cluster = false; - //System.out.println("END of cluster\n"); - if (gene_identiers.isEmpty()){ + if (gene_identiers.isEmpty()) { continue; } - //String[] gene_array = gene_identiers.toString().replaceFirst(".$","").split(","); ArrayList<Node> gene_node_list = retrieve_gene_nodes_from_ids(gene_identiers, gene_map, null, log_builder); int[] bgc_address = create_gene_cluster_address(genome_nr, gene_node_list); String cluster_type_str = cluster_type.toString(); @@ -5491,11 +5482,12 @@ public class FunctionalAnnotations { bgc_node.setProperty("genome", genome_nr); bgc_node.setProperty("sequence", bgc_address[1]); bgc_node.setProperty("address_genes", bgc_address); - if (cluster_start_pos != 0) { - bgc_address[2] = cluster_start_pos; - bgc_address[3] = cluster_end_pos; - bgc_node.setProperty("address", bgc_address); + if (cluster_start_pos == 0) { + cluster_start_pos ++; } + bgc_address[2] = cluster_start_pos; + bgc_address[3] = cluster_end_pos; + bgc_node.setProperty("address", bgc_address); bgc_node.setProperty("annotation_id", annotation_id); int position = 1; for (Node gene_node : gene_node_list) { -- GitLab