Skip to content
Snippets Groups Projects
Commit 6c8a27b7 authored by Martijn Landman's avatar Martijn Landman
Browse files

Minor cleaning and streamlining

parent 2983f744
No related branches found
No related tags found
No related merge requests found
...@@ -16,81 +16,9 @@ import java.time.format.DateTimeFormatter; ...@@ -16,81 +16,9 @@ import java.time.format.DateTimeFormatter;
import java.util.*; import java.util.*;
public class NCBI_Matrices { public class NCBI_Matrices {
static String newline = System.getProperty("line.separator");
static String startTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")); static String startTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss"));
static String targetDir = "./json/all"; static String targetDir = "./json/all";
@Test
public void sampleAttributeValueCount() throws IOException {
File root = new File(targetDir+"/fetch");
Map<String, Map<String, Integer>> SAMap = new HashMap<>();
int fileCounter = 0;
for (File dir : Objects.requireNonNull(root.listFiles())) {
for (File bpFile : Objects.requireNonNull(dir.listFiles())) {
fileCounter++;
JsonFactory jsonFactory = new MappingJsonFactory();
JsonParser jsonParser = jsonFactory.createParser(bpFile);
JsonToken current = jsonParser.nextToken();
if (current != JsonToken.START_OBJECT) {
System.out.println("ERROR: root should be object: quiting.");
return;
}
while (jsonParser.nextToken() != null) {
String fieldName = jsonParser.getCurrentName();
current = jsonParser.nextToken();
if (Objects.equals(fieldName, "SAMPLE_ATTRIBUTE")) {
if (current == JsonToken.START_ARRAY) {
while (jsonParser.nextToken() != JsonToken.END_ARRAY) {
SAVCAppendMap(SAMap, jsonParser.readValueAsTree());
}
} else if (current == JsonToken.START_OBJECT) {
SAVCAppendMap(SAMap, jsonParser.readValueAsTree());
} else if (current != JsonToken.END_ARRAY && current != JsonToken.END_OBJECT) {
System.out.println("Error: " + current.name() + " in " + jsonParser.getCurrentName() + ": skipping to next in file: " + bpFile.getAbsolutePath());
jsonParser.skipChildren();
}
// } else {
// System.out.println("Unprocessed property: " + fieldName);
}
}
}
}
String dir = targetDir+"/parsings";
boolean isDirCreated = new File(dir).exists() || new File(dir).mkdirs();
PrintWriter writer = new PrintWriter(new FileWriter(dir+"/sampleAttributeValueCount"+startTime+".txt"));
System.out.println("Processed " + fileCounter + " files.");
for (Map.Entry<String, Map<String, Integer>> tag : SAMap.entrySet()) {
System.out.println(tag.getKey() + " => ");
writer.println(tag.getKey() + " => ");
for (Map.Entry<String, Integer> entry : tag.getValue().entrySet()) {
System.out.println("\t" + entry.getKey() + " => " + entry.getValue());
writer.println("\t" + entry.getKey() + " => " + entry.getValue());
}
}
writer.close();
}
private void SAVCAppendMap(Map<String, Map<String, Integer>> SAMap, JsonNode jsonNode) {
String tag = jsonNode.get("TAG").asText();
String value = jsonNode.get("VALUE").asText();
Map<String, Integer> values = SAMap.get(tag);
if (values == null) {
values = new HashMap<>();
values.put(value, 1);
SAMap.put(tag, values);
} else if (SAMap.get(tag).get(value) == null) {
SAMap.get(tag).put(value, 1);
} else {
Integer count = SAMap.get(tag).get(value);
SAMap.get(tag).put(value, ++count);
}
}
@Test @Test
public void attributeAbundanceMatrix() throws IOException { public void attributeAbundanceMatrix() throws IOException {
File root = new File(targetDir+"/fetch"); File root = new File(targetDir+"/fetch");
......
...@@ -8,8 +8,6 @@ import org.apache.commons.io.FileUtils; ...@@ -8,8 +8,6 @@ import org.apache.commons.io.FileUtils;
import org.junit.Test; import org.junit.Test;
import java.io.*; import java.io.*;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Scanner; import java.util.Scanner;
import java.util.regex.Matcher; import java.util.regex.Matcher;
...@@ -18,7 +16,7 @@ import java.util.regex.Pattern; ...@@ -18,7 +16,7 @@ import java.util.regex.Pattern;
public class NCBI_NLP { public class NCBI_NLP {
@Test @Test
public void cleanInput() throws IOException { public void cleanInput() {
File file = new File("./nlp/Input/AllAttributes_Raw.txt"); File file = new File("./nlp/Input/AllAttributes_Raw.txt");
ArrayList<String> list = new ArrayList<>(); ArrayList<String> list = new ArrayList<>();
Pattern pattern = Pattern.compile("[^a-zA-Z\\d\\s]|\\s\\S\\s|\\s\\S$|^\\S\\s|\\s+"); Pattern pattern = Pattern.compile("[^a-zA-Z\\d\\s]|\\s\\S\\s|\\s\\S$|^\\S\\s|\\s+");
...@@ -39,7 +37,14 @@ public class NCBI_NLP { ...@@ -39,7 +37,14 @@ public class NCBI_NLP {
} }
File outputFile = new File("./nlp/Output/AllAttributes_Processed.txt"); File outputFile = new File("./nlp/Output/AllAttributes_Processed.txt");
FileUtils.writeLines(outputFile, list, false); try {
FileUtils.writeLines(outputFile, list, false);
} catch (IOException e) {
System.out.println(e +"\nFailed to save results to file, see error message above.\nResults are instead provided below:\n");
for (String line : list) {
System.out.println(line);
}
}
} }
@Test @Test
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment