diff --git a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java index 31b1d0507bacb1c044c164d794dfc7bb2d047115..c2cf18b2120081e7e3f180428f78d70f25f613b4 100644 --- a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java +++ b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java @@ -16,81 +16,9 @@ import java.time.format.DateTimeFormatter; import java.util.*; public class NCBI_Matrices { - static String newline = System.getProperty("line.separator"); static String startTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")); static String targetDir = "./json/all"; - @Test - public void sampleAttributeValueCount() throws IOException { - File root = new File(targetDir+"/fetch"); - Map<String, Map<String, Integer>> SAMap = new HashMap<>(); - int fileCounter = 0; - - for (File dir : Objects.requireNonNull(root.listFiles())) { - for (File bpFile : Objects.requireNonNull(dir.listFiles())) { - fileCounter++; - - JsonFactory jsonFactory = new MappingJsonFactory(); - JsonParser jsonParser = jsonFactory.createParser(bpFile); - JsonToken current = jsonParser.nextToken(); - - if (current != JsonToken.START_OBJECT) { - System.out.println("ERROR: root should be object: quiting."); - return; - } - - while (jsonParser.nextToken() != null) { - String fieldName = jsonParser.getCurrentName(); - current = jsonParser.nextToken(); - if (Objects.equals(fieldName, "SAMPLE_ATTRIBUTE")) { - if (current == JsonToken.START_ARRAY) { - while (jsonParser.nextToken() != JsonToken.END_ARRAY) { - SAVCAppendMap(SAMap, jsonParser.readValueAsTree()); - } - } else if (current == JsonToken.START_OBJECT) { - SAVCAppendMap(SAMap, jsonParser.readValueAsTree()); - } else if (current != JsonToken.END_ARRAY && current != JsonToken.END_OBJECT) { - System.out.println("Error: " + current.name() + " in " + jsonParser.getCurrentName() + ": skipping to next in file: " + bpFile.getAbsolutePath()); - jsonParser.skipChildren(); - } -// } else { -// System.out.println("Unprocessed property: " + fieldName); - } - } - } - } - String dir = targetDir+"/parsings"; - boolean isDirCreated = new File(dir).exists() || new File(dir).mkdirs(); - PrintWriter writer = new PrintWriter(new FileWriter(dir+"/sampleAttributeValueCount"+startTime+".txt")); - System.out.println("Processed " + fileCounter + " files."); - for (Map.Entry<String, Map<String, Integer>> tag : SAMap.entrySet()) { - System.out.println(tag.getKey() + " => "); - writer.println(tag.getKey() + " => "); - for (Map.Entry<String, Integer> entry : tag.getValue().entrySet()) { - System.out.println("\t" + entry.getKey() + " => " + entry.getValue()); - writer.println("\t" + entry.getKey() + " => " + entry.getValue()); - } - } - writer.close(); - } - - private void SAVCAppendMap(Map<String, Map<String, Integer>> SAMap, JsonNode jsonNode) { - String tag = jsonNode.get("TAG").asText(); - String value = jsonNode.get("VALUE").asText(); - - Map<String, Integer> values = SAMap.get(tag); - if (values == null) { - values = new HashMap<>(); - values.put(value, 1); - SAMap.put(tag, values); - } else if (SAMap.get(tag).get(value) == null) { - SAMap.get(tag).put(value, 1); - } else { - Integer count = SAMap.get(tag).get(value); - SAMap.get(tag).put(value, ++count); - } - } - @Test public void attributeAbundanceMatrix() throws IOException { File root = new File(targetDir+"/fetch"); diff --git a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java index 34100047add92c1454a38b4276ce1cfacf4a2fa0..abf245fe8af97ea11fc9f98971e84fe9a27abe59 100644 --- a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java +++ b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java @@ -8,8 +8,6 @@ import org.apache.commons.io.FileUtils; import org.junit.Test; import java.io.*; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Scanner; import java.util.regex.Matcher; @@ -18,7 +16,7 @@ import java.util.regex.Pattern; public class NCBI_NLP { @Test - public void cleanInput() throws IOException { + public void cleanInput() { File file = new File("./nlp/Input/AllAttributes_Raw.txt"); ArrayList<String> list = new ArrayList<>(); Pattern pattern = Pattern.compile("[^a-zA-Z\\d\\s]|\\s\\S\\s|\\s\\S$|^\\S\\s|\\s+"); @@ -39,7 +37,14 @@ public class NCBI_NLP { } File outputFile = new File("./nlp/Output/AllAttributes_Processed.txt"); - FileUtils.writeLines(outputFile, list, false); + try { + FileUtils.writeLines(outputFile, list, false); + } catch (IOException e) { + System.out.println(e +"\nFailed to save results to file, see error message above.\nResults are instead provided below:\n"); + for (String line : list) { + System.out.println(line); + } + } } @Test