diff --git a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java
index c2cf18b2120081e7e3f180428f78d70f25f613b4..50795268af35feb28c1d609ce35045150b60835f 100644
--- a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java
+++ b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_Matrices.java
@@ -1,11 +1,9 @@
 package nl.fairbydesign.backend.ncbi;
 
 import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonToken;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.*;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.junit.Test;
 
@@ -17,11 +15,10 @@ import java.util.*;
 
 public class NCBI_Matrices {
     static String startTime = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss"));
-    static String targetDir = "./json/all";
 
     @Test
     public void attributeAbundanceMatrix() throws IOException {
-        File root = new File(targetDir+"/fetch");
+        File root = new File("./json/all/fetch");
         // Map<projectID, Set[attributes]>
         Map<String, Set<String>> attributeMap = new HashMap<>();
         Set<String> attributes = new HashSet<>();
@@ -56,11 +53,11 @@ public class NCBI_Matrices {
             }
         }
         System.out.println("processed "+counter+" files in total");
-        writeAttributes(attributes);
+        AAMWriteAttributes(attributes);
         AAMtoCSV(attributes, attributeMap);
     }
 
-    public Set<String> getAllNodeKeys(String json) throws IOException {
+    private Set<String> getAllNodeKeys(String json) throws IOException {
         ObjectMapper objectMapper = new ObjectMapper();
         objectMapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true);
         objectMapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
@@ -97,7 +94,7 @@ public class NCBI_Matrices {
      * @param attributeMap     A HashMap of HashSets with projectID as keys
      */
     private void AAMtoCSV(Set<String> sampleAttributes, Map<String, Set<String>> attributeMap) {
-        String folder = targetDir+"/parsings/";
+        String folder = "./json/all/parsings/";
         boolean isFolderCreated = new File(folder).exists() || new File(folder).mkdirs();
         File file = new File(folder+"AAM"+startTime+".csv");
 
@@ -137,7 +134,7 @@ public class NCBI_Matrices {
     /**
      * @param sampleAttributes  A HashSet of possible attributes
      */
-    private void writeAttributes (Set<String> sampleAttributes) {
+    private void AAMWriteAttributes(Set<String> sampleAttributes) {
         String folder = "./nlp/Input/";
         boolean isFolderCreated = new File(folder).exists() || new File(folder).mkdirs();
         File file = new File(folder+"AllAttributes_Raw.txt");
@@ -157,4 +154,17 @@ public class NCBI_Matrices {
         }
     }
 
+    @Test
+    public void similarityMatrix() {
+        File inputFile = new File("./nlp/Output/AllAttributes_Cleaned.txt");
+        try (Scanner scanner = new Scanner(inputFile)) {
+            scanner.nextLine();
+            while (scanner.hasNextLine()) {
+                System.out.println(scanner.nextLine().split(",")[1]);
+            }
+        } catch (FileNotFoundException e) {
+            System.out.println("Could not find file: " + inputFile);
+        }
+    }
+
 }
diff --git a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java
index abf245fe8af97ea11fc9f98971e84fe9a27abe59..bce6c861df9fc92ea012b32dd5b1753ee99e8789 100644
--- a/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java
+++ b/src/main/java/nl/fairbydesign/backend/ncbi/NCBI_NLP.java
@@ -9,6 +9,8 @@ import org.junit.Test;
 
 import java.io.*;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Scanner;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -19,7 +21,7 @@ public class NCBI_NLP {
     public void cleanInput() {
         File file = new File("./nlp/Input/AllAttributes_Raw.txt");
         ArrayList<String> list = new ArrayList<>();
-        Pattern pattern = Pattern.compile("[^a-zA-Z\\d\\s]|\\s\\S\\s|\\s\\S$|^\\S\\s|\\s+");
+        Pattern pattern = Pattern.compile("[^a-zA-Z\\d\\s]|\\s+");
         try (Scanner scanner = new Scanner(file)) {
             while (scanner.hasNextLine()) {
                 String attribute = scanner.nextLine();
@@ -30,13 +32,13 @@ public class NCBI_NLP {
                     cleanedAtt = cleanedAtt.replaceAll(pattern.toString(), " ");
                     i = matcher.start() + 1;
                 }
-                list.add(attribute +" --> "+ cleanedAtt.trim());
+                list.add(attribute +","+ cleanedAtt.trim());
             }
         } catch (FileNotFoundException e) {
             System.out.println("Could not find file: " + file);
         }
 
-        File outputFile = new File("./nlp/Output/AllAttributes_Processed.txt");
+        File outputFile = new File("./nlp/Output/AllAttributes_Cleaned.csv");
         try {
             FileUtils.writeLines(outputFile, list, false);
         } catch (IOException e) {
@@ -47,6 +49,45 @@ public class NCBI_NLP {
         }
     }
 
+    @Test
+    public void levenshteinMatrix() {
+        File inputFile = new File("./nlp/Output/AllAttributes_Cleaned.csv");
+        ArrayList<String> cleanedAttributes = new ArrayList<>();
+        try (Scanner scanner = new Scanner(inputFile)) {
+            while (scanner.hasNextLine()) {
+                cleanedAttributes.add(scanner.nextLine().split(",")[1]);
+            }
+        } catch (FileNotFoundException e) {
+            System.out.println("Could not find file: " + inputFile);
+        }
+
+        File outputFile = new File("./nlp/Output/LevenshteinDistance.csv");
+        int attrCount = cleanedAttributes.size();
+        try (FileWriter fw = new FileWriter(outputFile);
+             BufferedWriter bw = new BufferedWriter(fw);
+             PrintWriter out = new PrintWriter(bw))
+        {
+            out.println(","+ cleanedAttributes);
+            for (int i = 0; i < attrCount; i++) {
+                String attr1 = cleanedAttributes.get(i);
+                StringBuilder row = new StringBuilder(attr1);
+                System.out.println("Processing all distances (" + (attrCount - i) + ") for: " + attr1);
+                for (int j = 0; j < attrCount; j++) {
+                    if (j < i) {
+                        row.append(",");
+                    } else {
+                        String attr2 = cleanedAttributes.get(j);
+                        int distance = LevenshteinDistance.compute_Levenshtein_distance(attr1, attr2);
+                        row.append(",").append(distance);
+                    }
+                }
+                out.println(row);
+            }
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
     @Test
     public void testLemmatizer() throws IOException {
         String[] tokens = new String[]{"Most", "large", "cities", "in", "the", "US", "had",