diff --git a/.gitignore b/.gitignore
index 4f447c05dd97a922e237c4a581fa17d4436b190b..31b6bc3df3dc3a553449555435dd5f37818712bb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -148,3 +148,7 @@ nbdist/
 .nb-gradle/
 
 # End of https://www.toptal.com/developers/gitignore/api/intellij,maven,netbeans
+
+# Sphinx build directories
+__pycache__
+output
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b3a354680b0240fa1b439fca98c2a823f84cedbb..4dd9c64c9d53e476846d124b75bef4a5d2981593 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,20 +3,50 @@
 image: harbor.containers.wurnet.nl/proxy-cache/library/maven:3.8.4-jdk-8-slim
 
 stages:
-  - test
-  - package
+  - test:unit
+  - test:end-to-end
 
-test:
-  stage: test
+unit-tests:
+  stage: test:unit
   script:
     - mvn test
 
-build-jars:
-  stage: package
+end-to-end-tests:yeast-1:
+  stage: test:end-to-end
+  variables:
+    KUBERNETES_MEMORY_REQUEST: 8Gi
+    KUBERNETES_MEMORY_LIMIT: 16Gi
   script:
-    - mvn package
-  only:
-    - tags
-  artifacts:
-    paths:
-      - target/*pantools-*.jar
+    - |
+      # Install system dependencies
+
+      apt-get update
+      apt-get install --yes --no-install-recommends git
+
+      # Install Conda
+
+      MINICONDA_INSTALLER=$(mktemp)
+      curl -o "${MINICONDA_INSTALLER}" --silent "https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh"
+      bash "${MINICONDA_INSTALLER}" -b
+      rm "${MINICONDA_INSTALLER}"
+      export PATH="${PATH}:/root/miniconda3/bin/"
+
+      # Install Mamba for faster installation of remaining packages
+
+      conda install --yes --channel conda-forge "mamba=0.15.3"
+
+      # Install KMC, samtools and Snakemake with Mamba
+
+      mamba install --yes --channel conda-forge --channel bioconda \
+        "kmc=3.0.1" \
+        "samtools=1.15" \
+        "snakemake=7.19.1"
+
+      # Run end-to-end tests with Snakemake
+
+      cd tests/
+      snakemake \
+        --cores 1 \
+        --printshellcmds \
+        --debug \
+        --configfile yeast-1.yaml
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
index 82c296044014daa8f524dfe2ad014028d57d0b4d..440a96665e4e708bc1e39b4e2a664293d39b2395 100644
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -2,7 +2,7 @@
   <profile version="1.0">
     <option name="myName" value="Project Default" />
     <inspection_tool class="AutoCloseableResource" enabled="true" level="WARNING" enabled_by_default="true">
-      <option name="METHOD_MATCHER_CONFIG" value="java.util.Formatter,format,java.io.Writer,append,com.google.common.base.Preconditions,checkNotNull,org.hibernate.Session,close,java.io.PrintWriter,printf,java.io.PrintStream,printf,java.lang.ClassLoader,getResourceAsStream" />
+      <option name="METHOD_MATCHER_CONFIG" value="java.util.Formatter,format,java.io.Writer,append,com.google.common.base.Preconditions,checkNotNull,org.hibernate.Session,close,java.io.PrintWriter,printf,java.io.PrintStream,printf,jakarta.validation.Configuration,buildValidatorFactory" />
     </inspection_tool>
     <inspection_tool class="GrazieInspection" enabled="false" level="TYPO" enabled_by_default="false" />
     <inspection_tool class="JavaDoc" enabled="true" level="WARNING" enabled_by_default="true">
@@ -36,41 +36,5 @@
       <option name="IGNORE_POINT_TO_ITSELF" value="false" />
       <option name="myAdditionalJavadocTags" value="Param" />
     </inspection_tool>
-    <inspection_tool class="JavadocDeclaration" enabled="true" level="WARNING" enabled_by_default="true">
-      <option name="ADDITIONAL_TAGS" value="Param" />
-    </inspection_tool>
-    <inspection_tool class="MissingJavadoc" enabled="true" level="WARNING" enabled_by_default="true">
-      <option name="PACKAGE_SETTINGS">
-        <Options>
-          <option name="ENABLED" value="false" />
-        </Options>
-      </option>
-      <option name="MODULE_SETTINGS">
-        <Options>
-          <option name="ENABLED" value="false" />
-        </Options>
-      </option>
-      <option name="TOP_LEVEL_CLASS_SETTINGS">
-        <Options>
-          <option name="ENABLED" value="false" />
-        </Options>
-      </option>
-      <option name="INNER_CLASS_SETTINGS">
-        <Options>
-          <option name="ENABLED" value="false" />
-        </Options>
-      </option>
-      <option name="METHOD_SETTINGS">
-        <Options>
-          <option name="REQUIRED_TAGS" value="@return@param@throws or @exception" />
-          <option name="ENABLED" value="false" />
-        </Options>
-      </option>
-      <option name="FIELD_SETTINGS">
-        <Options>
-          <option name="ENABLED" value="false" />
-        </Options>
-      </option>
-    </inspection_tool>
   </profile>
 </component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 265d7c863fa6d5033edde2d8cad85998ae19702f..b3658151e90c0dfd21e9af4c2d2cded7f28ace92 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="EntryPointsManager">
-    <list size="12">
+    <list size="13">
       <item index="0" class="java.lang.String" itemvalue="nl.wur.bif.pantools.cli.validation.Constraints.InputDirectory" />
       <item index="1" class="java.lang.String" itemvalue="nl.wur.bif.pantools.cli.validation.Constraints.InputFile" />
       <item index="2" class="java.lang.String" itemvalue="nl.wur.bif.pantools.cli.validation.Constraints.InputFiles" />
@@ -10,10 +10,11 @@
       <item index="5" class="java.lang.String" itemvalue="nl.wur.bif.pantools.cli.validation.Constraints.MinOrZero" />
       <item index="6" class="java.lang.String" itemvalue="nl.wur.bif.pantools.cli.validation.Constraints.Patterns" />
       <item index="7" class="java.lang.String" itemvalue="nl.wur.bif.pantools.validation.Constraints.InputDirectory" />
-      <item index="8" class="java.lang.String" itemvalue="picocli.CommandLine.ArgGroup" />
-      <item index="9" class="java.lang.String" itemvalue="picocli.CommandLine.Mixin" />
-      <item index="10" class="java.lang.String" itemvalue="picocli.CommandLine.Option" />
-      <item index="11" class="java.lang.String" itemvalue="picocli.CommandLine.Parameters" />
+      <item index="8" class="java.lang.String" itemvalue="org.junit.jupiter.api.BeforeAll" />
+      <item index="9" class="java.lang.String" itemvalue="picocli.CommandLine.ArgGroup" />
+      <item index="10" class="java.lang.String" itemvalue="picocli.CommandLine.Mixin" />
+      <item index="11" class="java.lang.String" itemvalue="picocli.CommandLine.Option" />
+      <item index="12" class="java.lang.String" itemvalue="picocli.CommandLine.Parameters" />
     </list>
     <writeAnnotations>
       <writeAnnotation name="picocli.CommandLine.Mixin" />
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6a943a7c921f27f030e2f78a253f8ada0460bf12..68b0eddaafaba3e1d7c44a606c1eadbd47f43db2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,11 +3,17 @@ repos:
     hooks:
     - id: maven-compile
       name: Compile with Maven
-      description: Compile all code with mvn compile
+      description: 'Compile all code with mvn compile.'
       entry: mvn
       args:
         - compile
       language: system
       pass_filenames: false
       types: [java]
-
+    - id: sphinx-lint
+      name: Sphinx lint
+      description: 'Searches for common typos in sphinx-flavored rst files.'
+      files: '\.rst$'
+      entry: sphinx-lint
+      args: [-e=all]
+      language: python
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e432eab41e91f34ac0c320669d993a9153105d81..53163c38c5197f9a176ba94bf63c3e359071092e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 All notable changes to Pantools will be documented in this file.
 
+## [4.1.0] - 23-12-2022
+
+### Added
+- New log4j2 logger with console and file appender (!85).
+- New globally accessible flags to regulate console logging output: silent, quiet, debug and trace (!85).
+- New feature, `export_pangenome`, to export a pangenome to a number of files for comparison with other pangenomes (!108).
+- Added CI validation test for build_pangenome and map_reads (!108)
+
+### Changed
+- Optimized localization for `build_pangenome` by making `localize_nodes()` parallel;
+  this code was sanity-checked with two small yeast datasets (!95).
+- `remove_phenotype` was renamed to `remove_phenotypes` to be consistent with `add_phenotypes` (!111).
+
+### Fixed
+- `group_info` now retrieves the correct homology groups with -H/-G (!110).
+- Resolved issue where `add_phenotypes` incorrectly binned columns when not every value was numeric (!111). 
+
 ## [4.0.0] - 21-12-2022
 
 ### Added
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2c71b8e283a0486caef75744a35b8c9b20a7bee8..88b629db04edfa78a709ae23610e9ebe5372c4b1 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -10,8 +10,8 @@ project = 'PanTools'
 copyright = '2016, the PanTools team'
 author = 'Sandra Smit'
 
-release = '4.0.0'
-version = '4.0.0'
+release = '4.1.0'
+version = '4.1.0'
 
 # -- General configuration
 
@@ -73,4 +73,5 @@ rst_prolog = """
 rst_epilog = """
 .. |ProjectVersion| replace:: {version}
 .. |PantoolsGit| replace:: https://git.wur.nl/bioinformatics/pantools/-/tree/v{version}
-""".format(version = version,)
\ No newline at end of file
+""".format(version = version,)
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 5fa335077940380bf21edffd08d0db76a47ed75d..65313a9bded0fbc4568274f80d6272ab51340aee 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -19,7 +19,8 @@ Publications
    data.
    <https://academic.oup.com/bioinformatics/article/32/17/i487/2450785>`_
 -  `Efficient inference of homologs in large eukaryotic pan-proteomes
-   <https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2362-4>`_
+   <https://bmcbioinformatics.biomedcentral.com/articles/10.1186/
+   s12859-018-2362-4>`_
 -  `Pan-genomic read mapping
    <https://www.biorxiv.org/content/10.1101/813634v1>`_
 -  `The Pectobacterium pangenome, with a focus on Pectobacterium
@@ -125,4 +126,4 @@ Contents
    Tutorial 2 - Construct pangenome <tutorial/tutorial_part2>
    Tutorial 3 - Neo4j browser <tutorial/tutorial_part3>
    Tutorial 4 - Characterization <tutorial/tutorial_part4>
-   Tutorial 5 - Phylogeny <tutorial/tutorial_part5>
\ No newline at end of file
+   Tutorial 5 - Phylogeny <tutorial/tutorial_part5>
diff --git a/docs/source/tutorial/tutorial_part1.rst b/docs/source/tutorial/tutorial_part1.rst
index dbbe86cea12ee5d36b87dc756775d7f7d44e226f..bf8c40f74f1233e625afe6e5c62e3ce9c60fa214 100644
--- a/docs/source/tutorial/tutorial_part1.rst
+++ b/docs/source/tutorial/tutorial_part1.rst
@@ -2,4 +2,4 @@ Part 1. Install PanTools
 ========================
 
 For instructions on how to install PanTools,
-see :doc:`/user_guide/install`.
\ No newline at end of file
+see :doc:`/user_guide/install`.
diff --git a/docs/source/tutorial/tutorial_part2.rst b/docs/source/tutorial/tutorial_part2.rst
index d09d702b0646731fd794687872b67ab0b905ef8e..bc14866fa85d4eeadaa132e0f07a2abf6e46d1ef 100644
--- a/docs/source/tutorial/tutorial_part2.rst
+++ b/docs/source/tutorial/tutorial_part2.rst
@@ -16,13 +16,13 @@ or via wget.
 .. code:: bash
 
    $ wget http://bioinformatics.nl/pangenomics/tutorial/chloroplasts.tar.gz
-   $ tar -xvzf chloroplasts.tar.gz #unpack the archive 
+   $ tar -xvzf chloroplasts.tar.gz #unpack the archive
 
 We assume a PanTools alias was set during the
-:ref:`installation <user_guide/install:set pantools alias>`. This allows PanTools
-to be executed with ``pantools`` rather than the full path to the jar file.
-If you don’t have an alias, either set one or replace the pantools command
-with the full path to the .jar file in the tutorials.
+:ref:`installation <user_guide/install:set pantools alias>`. This allows
+PanTools to be executed with ``pantools`` rather than the full path to the jar
+file. If you don’t have an alias, either set one or replace the pantools
+command with the full path to the .jar file in the tutorials.
 
 --------------
 
@@ -42,8 +42,8 @@ the following lines:
    YOUR_PATH/S_tuberosum.fasta
 
 Make sure that ‘*YOUR_PATH*’ is the full path to the input files! Then
-run PanTools with the :ref:`build_pangenome <user_guide/construct:build pangenome>`
-function and include the text file
+run PanTools with the :ref:`build_pangenome <user_guide/construct:build
+pangenome>` function and include the text file
 
 .. code:: bash
 
@@ -87,9 +87,8 @@ file:
    4 YOUR_PATH/S_tuberosum.gff3
    5 YOUR_PATH/Z_mays.gff3
 
-Run PanTools using the
-:ref:`add_annotations <user_guide/construct:add annotations>` function and include
-the new text file
+Run PanTools using the :ref:`add_annotations <user_guide/construct:add
+annotations>` function and include the new text file
 
 .. code:: bash
 
@@ -152,12 +151,12 @@ and separate them by a single space
    1 1 200 500
    2 1 300 700
    3 1 1 10000
-   3 1 1 10000 - 
+   3 1 1 10000 -
    4 1 9999 15000
    5 1 100000 110000
 
-Now run the :ref:`retrieve_regions <user_guide/explore:retrieve regions>` function
-and include the new text file
+Now run the :ref:`retrieve_regions <user_guide/explore:retrieve regions>`
+function and include the new text file
 
 .. code:: bash
 
diff --git a/docs/source/tutorial/tutorial_part3.rst b/docs/source/tutorial/tutorial_part3.rst
index a2261a7d9285a94facf4a6c2a0f9ac35bcf40f7f..93949bde0d367c790d43df68222521b1f098a242 100644
--- a/docs/source/tutorial/tutorial_part3.rst
+++ b/docs/source/tutorial/tutorial_part3.rst
@@ -39,10 +39,10 @@ a single uncommented line with 'dbms.directories.data'.
 
 .. code:: text
 
-   #dbms.connectors.default_listen_address=0.0.0.0             
-   #dbms.connector.bolt.listen_address=:7687               
-   #dbms.connector.http.listen_address=:7474               
-   #dbms.connector.https.listen_address=:7473              
+   #dbms.connectors.default_listen_address=0.0.0.0
+   #dbms.connector.bolt.listen_address=:7687
+   #dbms.connector.http.listen_address=:7474
+   #dbms.connector.https.listen_address=:7473
 
 Lets start up the Neo4j server!
 
@@ -74,8 +74,8 @@ Exploring nodes and edges in Neo4j
 Go through the following steps to become proficient in using the Neo4j
 browser and the underlying PanTools data structure. If you have any
 difficulty trouble finding a node, relationship or any type of
-information, download and use `this visual
-guide <http://www.bioinformatics.nl/pangenomics/tutorial/neo4j_browser.tar.gz>`_.
+information, download and use `this visual guide
+<http://www.bioinformatics.nl/pangenomics/tutorial/neo4j_browser.tar.gz>`_.
 
 1.  Click on the database icon on the left. A menu with all node types
     and relationship types will appear.
diff --git a/docs/source/tutorial/tutorial_part4.rst b/docs/source/tutorial/tutorial_part4.rst
index ebd9210dc0671219ca353a57d739b5b7a20cebfc..da26dcc6bce6e6e211878d19bc0afa552f3cb49b 100644
--- a/docs/source/tutorial/tutorial_part4.rst
+++ b/docs/source/tutorial/tutorial_part4.rst
@@ -16,10 +16,10 @@ the following commands.
    $ mafft -h
 
 We assume a PanTools alias was set during the
-:ref:`installation <user_guide/install:set pantools alias>`. This allows PanTools
-to be executed with ``pantools`` rather than the full path to the jar file.
-If you don’t have an alias, either set one or replace the pantools command with
-the full path to the .jar file in the tutorials.
+:ref:`installation <user_guide/install:set pantools alias>`. This allows
+PanTools to be executed with ``pantools`` rather than the full path to the jar
+file. If you don’t have an alias, either set one or replace the pantools
+command with the full path to the .jar file in the tutorials.
 
 Input data
 ----------
@@ -39,7 +39,7 @@ Option 1: Download separate genome and annotation files
 .. code:: bash
 
    $ wget http://bioinformatics.nl/pangenomics/tutorial/pecto_dickeya_input.tar.gz
-   $ tar -xvzf pecto_dickeya_input.tar.gz 
+   $ tar -xvzf pecto_dickeya_input.tar.gz
    $ gzip -d pecto_dickeya_input/annotations/*
    $ gzip -d pecto_dickeya_input/genomes/*
    $ gzip -d pecto_dickeya_input/functions/*
@@ -91,8 +91,8 @@ Metrics and general statistics
 ------------------------------
 
 After building or uncompressing the pangenome, run the
-:ref:`metrics <user_guide/characterize:metrics>` functionality to produce various
-statistics that should verify an errorless construction.
+:ref:`metrics <user_guide/characterize:metrics>` functionality to produce
+various statistics that should verify an errorless construction.
 
 .. code:: bash
 
@@ -111,9 +111,9 @@ coding sequences are not interrupted.
 Gene classification
 -------------------
 
-With the :ref:`gene_classification <user_guide/characterize:gene classification>`
-functionality you are able to organize the gene repertoire into the
-core, accessory or unique part of the pangenome.
+With the :ref:`gene_classification <user_guide/characterize:gene
+classification>` functionality you are able to organize the gene repertoire
+into the core, accessory or unique part of the pangenome.
 
 -  **Core**, a gene is present in all genomes
 -  **Unique**, a gene is present in a single genome
@@ -192,8 +192,8 @@ genomes of a certain phenotype have additional gene copies to (at least
 one of) the other phenotypes.
 
 Each time you run the
-:ref:`gene_classification <user_guide/characterize:gene classification>` function,
-multiple files are created that contain node identifiers of a certain
+:ref:`gene_classification <user_guide/characterize:gene classification>`
+function, multiple files are created that contain node identifiers of a certain
 homology group category. These files can be given to other PanTools
 functions for a downstream analysis, for example, sequence alignment,
 phylogeny, or GO enrichment. We will use one of the files later in this
@@ -206,9 +206,9 @@ Pangenome structure
 
 With the previous functionality we identified the core, accessory and
 unique parts of the pangenome. Now we will use the
-:ref:`pangenome_size_genes <user_guide/characterize:pangenome structure>` function to
-observe how these numbers are reached by simulating the growth of the
-pangenome. Simulating the growth helps explaining if a pangenome should
+:ref:`pangenome_size_genes <user_guide/characterize:pangenome structure>`
+function to observe how these numbers are reached by simulating the growth of
+the pangenome. Simulating the growth helps explaining if a pangenome should
 be considered open or closed. An pangenome is called open as long as a
 significant number of new (unique) genes are added to the total gene
 repertoire. The openness of a pangenome is usually tested using Heap’s
@@ -237,7 +237,7 @@ look at the files.
 .. code:: bash
 
    $ wget wget http://bioinformatics.nl/pangenomics/tutorial/pectobacterium_structure.tar.gz
-   $ tar -xvf pectobacterium_structure.tar.gz 
+   $ tar -xvf pectobacterium_structure.tar.gz
 
 Normally you still have to run the R scripts to create the output figures
 and determine the openness of the pangenome.
@@ -285,8 +285,8 @@ Functional annotations
 PanTools is able to incorporate functional annotations into the
 pangenome by reading output of various functional annotation tools. In
 this tutorial we only include annotations from InterProScan. Please see
-the :ref:`add_functions <user_guide/construct:add functional annotations>` manual
-to check which other tools are available. To include the annotations,
+the :ref:`add_functions <user_guide/construct:add functional annotations>`
+manual to check which other tools are available. To include the annotations,
 create a file **functions.txt** using text from the box below and add it
 to the command line argument.
 
@@ -457,7 +457,8 @@ the pangenome.
    :ref:`kmer_classification <user_guide/characterize:k-mer classification>`.
 -  Find co-localized genes in a set of homology groups:
    :ref:`locate_genes <user_guide/explore:locate genes>`.
--  Mapping short reads against the pangenome with :doc:`map </user_guide/mapping>`.
+-  Mapping short reads against the pangenome with :doc:`map
+   </user_guide/mapping>`.
 
 In :doc:`part 5 <tutorial_part5>` of the tutorial we explore some of the
 phylogenetic methods implemented in PanTools.
diff --git a/docs/source/tutorial/tutorial_part5.rst b/docs/source/tutorial/tutorial_part5.rst
index 78eeb18e4b309e2684f3a277aff8404c6a09c64e..0d2f929a99698594537b0aca3e709df89162dfc9 100644
--- a/docs/source/tutorial/tutorial_part5.rst
+++ b/docs/source/tutorial/tutorial_part5.rst
@@ -24,7 +24,7 @@ pre-constructed pangenome
 .. code:: bash
 
    $ wget http://bioinformatics.nl/pangenomics/tutorial/pecto_dickeya_DB.tar.gz
-   $ tar -xvzf pecto_dickeya_DB.tar.gz 
+   $ tar -xvzf pecto_dickeya_DB.tar.gz
 
 --------------
 
@@ -42,13 +42,13 @@ using :ref:`add_phenotypes <user_guide/construct:add phenotypes>`.
 
 .. code:: text
 
-   Genome, species, strain_name, low_temperature   
+   Genome, species, strain_name, low_temperature
    1,P. odoriferum,P. odoriferum Q166, false
    2,P. fontis, P. fontis M022, true
    3,P. polaris,P. polaris S4.16.03.2B, false
    4,P. brasiliense, P. brasiliense S2, true
    5,P. brasiliense, P. brasiliense Y49, false
-   6,D. dadantii, D. dadantii 3937,?  
+   6,D. dadantii, D. dadantii 3937,?
 
 .. code:: bash
 
@@ -133,7 +133,7 @@ simply execute the Rscript that was created by
 
 .. code:: bash
 
-   $ Rscript pecto_dickeya_DB/gene_classification/gene_distance_tree.R 
+   $ Rscript pecto_dickeya_DB/gene_classification/gene_distance_tree.R
 
 The resulting tree is called **gene_distance.tree**.
 
@@ -141,14 +141,15 @@ K-mer distance tree
 ~~~~~~~~~~~~~~~~~~~
 
 To obtain a k-mer distance phylogeny, the k-mers must first be counted
-with the :ref:`kmer_classification <user_guide/characterize:k-mer classification>`
+with the :ref:`kmer_classification <user_guide/characterize:k-mer
+classification>`
 function. Afterwards, the tree can be constructed by executing the
 Rscript.
 
 .. code:: bash
 
    $ pantools kmer_classification pecto_dickeya_DB
-   $ Rscript pecto_dickeya_DB/kmer_classification/genome_kmer_distance_tree.R 
+   $ Rscript pecto_dickeya_DB/kmer_classification/genome_kmer_distance_tree.R
 
 The resulting tree is written to **genome_kmer_distance.tree**.
 
@@ -168,8 +169,8 @@ names as well, because we included this as a phenotype during
 
 Let’s include the strain identifiers to the core snp tree to make the
 final figure more informative. Use the
-:ref:`rename_phylogeny <user_guide/phylogeny:rename phylogeny>` function to rename
-the tree nodes.
+:ref:`rename_phylogeny <user_guide/phylogeny:rename phylogeny>` function to
+rename the tree nodes.
 
 .. code:: bash
 
@@ -231,8 +232,8 @@ Create iTOL templates
 
 In iTOL it is possible to add colors to the tree by coloring the
 terminal nodes or adding an outer ring. The PanTools function
-:ref:`create_tree_template <user_guide/phylogeny:create tree template>` is able to
-create templates that allows for easy coloring (with maximum of 20
+:ref:`create_tree_template <user_guide/phylogeny:create tree template>` is able
+to create templates that allows for easy coloring (with maximum of 20
 possible colors). If the function is run without any additional
 argument, templates are created for trees that only contain genome
 numbers (e.g. k-mer distance tree). Here we want to color the (renamed)
diff --git a/docs/source/user_guide/characterize.rst b/docs/source/user_guide/characterize.rst
index 89d579011af16aac4d07df92daa5a2087c06a726..031c7a15c8d454537e4089393c50afa2e4287238 100644
--- a/docs/source/user_guide/characterize.rst
+++ b/docs/source/user_guide/characterize.rst
@@ -363,8 +363,8 @@ newly discovered genes, *N* is the total number of genomes, and *k* and
 
 Pangenome size estimation is based on homology groups. This function
 requires the sequences to be already clustered by
-:ref:`group <user_guide/construct:group>`. The same simulation can be performed on
-*k*-mer sequences instead of homology groups with ``--kmer``. As the number
+:ref:`group <user_guide/construct:group>`. The same simulation can be performed
+on *k*-mer sequences instead of homology groups with ``--kmer``. As the number
 of *k*-mers is significantly higher than the number of homology groups, the
 runtime is much longer and the (default) number of loops is set to only 100.
 
@@ -405,7 +405,7 @@ Example commands
    $ pantools pangenome_structure --loops=1000 --exclude=1-3,5  tomato_DB
    $ pantools pangenome_structure --kmer tomato_db
 
-   $ R script pangenome_growth.R 
+   $ R script pangenome_growth.R
    $ R script gains_losses_median_or_average.R
    $ R script gains_losses_median_and_average.R
    $ R script heaps_law.R
@@ -776,7 +776,7 @@ Parameters
 Options
 """""""
 
-Requires **one** of ``--homology-file``|``--nodes``.
+Requires **one** of ``--homology-file``\|\ ``--nodes``.
 
 .. list-table::
    :widths: 30 70
diff --git a/docs/source/user_guide/construct.rst b/docs/source/user_guide/construct.rst
index e383b11707eb1ed9cecd1267fbf2baef16bfed7d..060f7c36a2b7ac81d48c90d0b6645d0cd87cc9ad 100644
--- a/docs/source/user_guide/construct.rst
+++ b/docs/source/user_guide/construct.rst
@@ -4,12 +4,41 @@ Construct a pangenome
 Build pangenome
 ---------------
 
-Build a pangenome out of a set of genomes.
+Build a pangenome out of a set of genomes. The construction consists of two
+steps: laying out the structure of the De Bruijn graph, and adding localization
+information to the graph.
+
+Optimized localization
+~~~~~~~~~~~~~~~~~~~~~~~~~
+The localization step of ``build_pangenome`` has been parallelized to increase
+performance. The level of parallelism is controlled by the ``--threads`` option
+(see below). Sequence nodes are localized in parallel, and updates to the
+localization database cached to disk.
+
+Localization updates are then sorted into a number of different files, called
+*buckets*, whose contents are written to Neo4j by a number of database writer
+threads in parallel (see the ``--num-db-writer-threads`` option below). Because
+each database writer thread reads the contents of only a single bucket into
+memory at a time, memory usage is reduced.
+
+To cache localization updates on disk PanTools needs a *scratch directory* for
+temporary storage. This directory will be created by PanTools automatically, or
+can be set to a directory using the ``--scratch-directory`` option.
+
+Lastly, an in-memory cache has been introduced to store frequently-accessed
+properties of nucleotide (sequence nodes). The cache will automatically retain
+the most-frequently used properties and evict least-frequently used items. This
+significantly increases performance by reducing Neo4j IO. The size of the cache
+can be controlled with the ``--cache-size`` option. To calculate the heap space
+the cache will occupy, multiply the maximum size of the cache by 128 bytes, e.g.
+for the default cache size of 10,000,000 PanTools will need an additional
+10,000,000 * 128 B = 1.28 GB of heap space.
 
 Required software
 ~~~~~~~~~~~~~~~~~
 
-`KMC 2.3 or 3.0 <http://sun.aei.polsl.pl/REFRESH/index.php?page=projects&project=kmc&subpage=about>`_
+`KMC 2.3 or 3.0 <http://sun.aei.polsl.pl/REFRESH/index.php?page=projects&
+project=kmc&subpage=about>`_
 
 Parameters
 ~~~~~~~~~~
@@ -32,6 +61,65 @@ Options
    * - ``--kmer-size``
      - Size of k-mers. Should be in range [6..255]. By not giving this
        argument, the most optimal k-mer size is calculated automatically.
+   * - ``--threads``/``-t``
+     - Number of parallel working threads, default is the number of cores or 8,
+       whichever is lower.
+   * - ``--scratch-directory``
+     - Temporary directory for storing localization update files. If not set
+       a temporary directory will be created inside the default temporary-file
+       directory. On most Linux distributions this default temporary-file
+       directory will be ``/tmp/``, on MacOS typically ``/var/folder/``.
+
+       If a scratch directory is set, it will be created if it does not exist.
+       If it does exist, PanTools will verify the directory is empty and, if
+       not, raise an exception.
+   * - ``--num-buckets``
+     - Number of buckets for sorting, default is 200. During the localization
+       phase updates are cached to disk and sorted into a number of files called
+       buckets. This is to reduce the memory usage of storing all localization
+       updates: instead of keeping them all in memory, we can now read buckets
+       with a given level of parallelism (see the ``--num-db-writer-threads``
+       option), and update Neo4j with each bucket's contents instead.
+
+       The more buckets are available the lower the memory usage. **However,
+       please make sure PanTools can keep a file open for each bucket during the
+       localization by setting the file descriptors limit to an appropriate
+       value.** For the default of 200 buckets, we advise setting the limit to
+       1024, like so: ``ulimit -n 1024``. For larger number of buckets, set the
+       limit to around 1,000 plus the number of buckets.
+   * - ``--transaction-size``
+     - Number of localization updates to pack into a single Neo4j transaction,
+       default is 10,000. To increase throughput to Neo4j localization updates
+       are packed into a single transaction. The greater the number of updates
+       per transaction the higher the throughput (up to a point), but the
+       higher the memory usage.
+
+       In our experiments we have found 10,000 to provide a good balance between
+       memory usage and performance.
+   * - ``--num-db-writer-threads``
+     - Number of threads to use for writing to Neo4j, default is 2. After
+       sorting localization updates into buckets (see the ``--num-buckets``
+       option), buckets are read in parallel by the specified number of
+       Neo4j database writer threads. With the default of two threads, the
+       contents of two buckets will be kept in memory at the same time, and
+       written to Neo4j with a given transaction size (see the
+       ``--transaction-size`` option).
+
+       In our experiments on SSD and network-backed storage we saw little
+       additional increase in performance by using more than two threads.
+   * - ``--cache-size``
+     - Maximum number of items in the node properties, default is 10,000,000.
+       During localization several properties of nucleotide (sequence) nodes
+       are accessed frequently. To prevent loading these from Neo4j every time
+       the specified number of most frequently used items are cached. The cache
+       can be disabled entirely by setting the cache size to zero.
+   * - ``--keep-intermediate-files``
+     - Do not delete intermediate localization files after the command finishes.
+       Disabled by default, i.e., files are deleted automatically after the
+       command finishes.
+
+
+
 
 Example genomes file
 ~~~~~~~~~~~~~~~~~~~~
@@ -79,7 +167,8 @@ put this annotation in the graph database, it can be that the features
 are not correctly added. This is especially true for non-standard GFF
 files and annotated organellar genomes. If you encounter problems with
 a gff file, please check whether it is valid to the
-`GFF3 specification <https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md>`_.
+`GFF3 specification <https://github.com/The-Sequence-Ontology/Specifications/
+blob/master/gff3.md>`_.
 Also, our code should be able to handle all valid GFF3 files, but
 if the GFF3 file contains a trans-spliced gene that has alternative
 splicing, it will not be able to handle it (it will only annotate one
@@ -138,7 +227,7 @@ the file that you used to construct the pangenome.
    2 /use_the/genome2.gff
    3 /full_path/genome3.gff
 
-|  \ **GFF3 file format**\ 
+|  \ **GFF3 file format**\
 | The GFF format consists of one line per feature, each containing 9
   columns of data, plus optional track definition lines, that must be
   tab separated. Please use the proper hierarchy for the feature:
@@ -308,7 +397,8 @@ Relevant literature
 """""""""""""""""""
 
 -  `Efficient inference of homologs in large eukaryotic pan-proteomes
-   <https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2362-4>`_
+   <https://bmcbioinformatics.biomedcentral.com/articles/10.1186/
+   s12859-018-2362-4>`_
 
 --------------
 
@@ -367,9 +457,9 @@ that are present in all genomes are used to validate each setting.
 
 No grouping is active after running this function. Use the generated
 output files to identify a suitable grouping. Activate this grouping
-using :ref:`change_grouping <user_guide/construct:change grouping>`. An overview of
-the available groupings and used settings is stored in the 'pangenome'
-node (inside the database), or can be created by running
+using :ref:`change_grouping <user_guide/construct:change grouping>`. An
+overview of the available groupings and used settings is stored in the
+'pangenome' node (inside the database), or can be created by running
 :ref:`grouping_overview <user_guide/characterize:grouping overview>`.
 
 Required software
@@ -387,8 +477,9 @@ Parameters
      - Path to the database root directory.
    * - <buscoDirectory>
      - The output directory created by the
-       :ref:`busco_protein <user_guide/construct:busco protein>` function. This directory is
-       found **inside** the pangenome database, in the *busco* directory.
+       :ref:`busco_protein <user_guide/construct:busco protein>` function. This
+       directory is found **inside** the pangenome database, in the *busco*
+       directory.
 
 Options
 """""""
@@ -458,7 +549,8 @@ database.
    :width: 300
    :align: center
 
-   *Example output of optimal_grouping.R. The number of FN and FP for all eight relaxation settings.*
+   *Example output of optimal_grouping.R. The number of FN and FP for all eight
+   relaxation settings.*
 
 --------------
 
@@ -503,9 +595,9 @@ Build panproteome
 
 Build a panproteome out of a set of proteins. By only including protein
 sequences, the usable functionalities are limited to a protein-based
-analysis, please see :doc:`differences pangenome and panproteome </user_guide/differences>`.
-No additional proteins can be added to the panproteome, it needs to be
-rebuilt completely.
+analysis, please see :doc:`differences pangenome and panproteome
+</user_guide/differences>`. No additional proteins can be added to the
+panproteome, it needs to be rebuilt completely.
 
 Parameters
 ~~~~~~~~~~
@@ -545,7 +637,8 @@ Add additional genomes to an existing pangenome.
 Required software
 ~~~~~~~~~~~~~~~~~
 
-`KMC 2.3 or 3.0 <http://sun.aei.polsl.pl/REFRESH/index.php?page=projects&project=kmc&subpage=about>`_
+`KMC 2.3 or 3.0 <http://sun.aei.polsl.pl/REFRESH/index.php?page=projects&
+project=kmc&subpage=about>`_
 
 Parameters
 ~~~~~~~~~~
@@ -625,6 +718,15 @@ Options
 .. list-table::
    :widths: 30 70
 
+   * - ``--scratch-directory``
+     - Temporary directory for storing localization update files. If not set
+       a temporary directory will be created inside the default temporary-file
+       directory. On most Linux distributions this default temporary-file
+       directory will be ``/tmp``, on MacOS typically ``/var/folders/``.
+
+       If a scratch directory is set, it will be created if it does not exist.
+       If it does exist, PanTools will verify the directory is empty and, if
+       not, raise an exception.
    * - ``--append``
      - Do not remove existing phenotype nodes but only add new
        properties to them. If a property already exists, values from
@@ -945,7 +1047,7 @@ InterProScan gff file:
 .. code:: text
 
    ##gff-version 3
-   ##interproscan-version 5.52-86.0 
+   ##interproscan-version 5.52-86.0
    AT4G21230.1   ProSiteProfiles protein_match 333 620 39.000664   +   .   date=06-10-2021;Target=mRNA.AT4G21230.1 333 620;Ontology_term="GO:0004672","GO:0005524","GO:0006468";ID=match$42_333_620;signature_desc=Protein kinase domain profile.;Name=PS50011;status=T;Dbxref="InterPro:IPR000719"
    AT3G08980.5   TIGRFAM protein_match         25  101 3.7E-14     +   .   date=06-10-2021;Target=mRNA.AT3G08980.5 25 101;Ontology_term="GO:0006508","GO:0008236","GO:0016020";ID=match$66_25_101;signature_desc=sigpep_I_bact: signal peptidase I;Name=TIGR02227;status=T;Dbxref="InterPro:IPR000223"
    AT2G17780.2   Phobius protein_match         338 354 .           +   .   date=06-10-2021;Target=AT2G17780.2 338 354;ID=match$141_338_354;signature_desc=Region of a membrane-bound protein predicted to be embedded in the membrane.;Name=TRANSMEMBRANE;status=T
@@ -1041,7 +1143,8 @@ Relevant literature
    <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3531188/>`_
 -  `A Combined Transmembrane Topology and Signal Peptide Prediction
    Method
-   <https://www.sciencedirect.com/science/article/abs/pii/S0022283604002943?via%3Dihub>`_
+   <https://www.sciencedirect.com/science/article/abs/pii/S0022283604002943?
+   via%3Dihub>`_
 -  `Expanded microbial genome coverage and improved protein family
    annotation in the COG database
    <https://academic.oup.com/nar/article/43/D1/D261/2439462>`_
@@ -1212,9 +1315,9 @@ Example commands
 
 .. code:: bash
 
-   $ pantools remove_phenotype tomato_DB
-   $ pantools remove_phenotype --phenotype=color tomato_DB
-   $ pantools remove_phenotype --phenotype=color --exclude=11,12 tomato_DB
+   $ pantools remove_phenotypes tomato_DB
+   $ pantools remove_phenotypes --phenotype=color tomato_DB
+   $ pantools remove_phenotypes --phenotype=color --exclude=11,12 tomato_DB
 
 --------------
 
diff --git a/docs/source/user_guide/explore.rst b/docs/source/user_guide/explore.rst
index 6c28e281a741a6fbb63df8a5f4f40c5cb8dafd13..a0d92cfacc75dd30787e783b430a4db0abc369bf 100644
--- a/docs/source/user_guide/explore.rst
+++ b/docs/source/user_guide/explore.rst
@@ -125,7 +125,7 @@ annotated by a method that follows the rules for genetic nomenclature.
 Gene naming can be inconsistent when different tools are used for genome
 annotation, making this functionality ineffective.
 
-This function is the same as 
+This function is the same as
 :ref:`mlsa_find_genes <user_guide/phylogeny:step 1 search for genes>` but
 uses a different output directory. Several warnings (shown in the other
 manual) can be generated during the search. These warning are less
@@ -269,8 +269,8 @@ positions that are separated by a space.
 .. code:: text
 
    195 1 477722 478426
-   71 10 17346 18056 
-   138 47 159593 160300 
+   71 10 17346 18056
+   138 47 159593 160300
 
 Example commands
 """"""""""""""""
@@ -297,7 +297,7 @@ Functional annotations
 ----------------------
 
 The following functions can only be used when any type of functional
-annotation is 
+annotation is
 :ref:`added to the database <user_guide/construct:add functional annotations>`.
 
 Show GO
@@ -480,7 +480,7 @@ Several functions generate tables in a CSV file format. as tables that
 the following functions can work with. For example, ANI scores, *k*-mer
 and gene distance used for constructing the Neighbour Joining
 :ref:`phylogenetic trees <user_guide/phylogeny:phylogeny>`, and the identity and
-protein sequence similarity tables created by the 
+protein sequence similarity tables created by the
 :ref:`alignment functions <user_guide/msa:msa>`.
 
 Order matrix
@@ -639,7 +639,7 @@ To extract:
 .. code:: text
 
    1
-   1 1 
+   1 1
    1 1 1 10000
    1 1 1000 1500 -
    195 1 477722 478426
@@ -673,7 +673,8 @@ Options
    :widths: 30 70
 
    * - ``--feature-type``
-     - Required. The feature name; for example 'gene', 'mRNA', 'exon', 'tRNA', etc.
+     - Required. The feature name; for example 'gene', 'mRNA', 'exon', 'tRNA',
+       etc.
    * - ``--include``/``-i``
      - Only include a selection of genomes.
    * - ``--exclude``/``-e``
diff --git a/docs/source/user_guide/install.rst b/docs/source/user_guide/install.rst
index ffe82a27dc693c18290651bbd7adffb2249e5cdf..7b0b68693bb48dd37434fd0c807891f9d50ef9f0 100644
--- a/docs/source/user_guide/install.rst
+++ b/docs/source/user_guide/install.rst
@@ -3,7 +3,8 @@ Installing and configuring the required software
 
 1. :ref:`user_guide/install:download pantools`
 2. :ref:`user_guide/install:install neo4j`
-3. :ref:`Install dependencies, either manually or through conda. <user_guide/install:dependencies>`
+3. :ref:`Install dependencies, either manually or through conda.
+<user_guide/install:dependencies>`
 
 For PanTools developers:
 
@@ -76,15 +77,16 @@ all earlier PanTools versions.
 Download the Neo4j 3.5.30 community edition from the `Neo4j
 website <https://neo4j.com/download-center/>`_ or download the binaries
 directly from our
-`server <http://www.bioinformatics.nl/pangenomics/tutorial/neo4j-community-3.5.30-unix.tar.gz>`_.
+`server <http://www.bioinformatics.nl/pangenomics/tutorial/
+neo4j-community-3.5.30-unix.tar.gz>`_.
 
 .. code:: bash
 
    $ wget http://www.bioinformatics.nl/pangenomics/tutorial/neo4j-community-3.5.30-unix.tar.gz
    $ tar -xvzf neo4j-community-*
 
-   # Edit your ~/.bashrc to include Neo4j to your $PATH 
-   $ echo "export PATH=/YOUR_PATH/neo4j-community-3.5.30/bin:\$PATH" >> ~/.bashrc #replace YOUR_PATH with the correct path on your computer 
+   # Edit your ~/.bashrc to include Neo4j to your $PATH
+   $ echo "export PATH=/YOUR_PATH/neo4j-community-3.5.30/bin:\$PATH" >> ~/.bashrc #replace YOUR_PATH with the correct path on your computer
    $ source ~/.bashrc
    $ neo4j status # test if Neo4j is executable
 
@@ -114,9 +116,9 @@ dependency solving.
 To install all dependencies into a separate environment, run the
 following commands. Please choose the conda_linux.yml or
 conda_macos.yml file depending on your operating system. These files
-be found in the `release <https://git.wur.nl/bioinformatics/pantools/-/releases>`_. The
-difference between the two files is that the linux file contains
-BUSCO v5.2.2, which is not compatible with the other dependencies on
+be found in the `release <https://git.wur.nl/bioinformatics/pantools/-/
+releases>`_. The difference between the two files is that the linux file
+contains BUSCO v5.2.2, which is not compatible with the other dependencies on
 macOS.
 
 .. code:: bash
@@ -163,7 +165,7 @@ downloaded from https://github.com/refresh-bio/KMC/releases.
    $ echo "export PATH=/YOUR_PATH/KMC/:\$PATH" >> ~/.bashrc #replace YOUR_PATH with the correct path on your computer
    $ source ~/.bashrc
    $ kmc # test if KMC is executable
-   $ kmc_tools # test if kmc_tools is executable 
+   $ kmc_tools # test if kmc_tools is executable
 
 --------------
 
@@ -176,7 +178,7 @@ https://micans.org/mcl under License & software.
 
 .. code:: bash
 
-   $ wget https://micans.org/mcl/src/mcl-14-137.tar.gz 
+   $ wget https://micans.org/mcl/src/mcl-14-137.tar.gz
    $ tar -xvzf mcl-*
    $ cd mcl-14-137
    $ ./configure --prefix=/YOUR_PATH/mcl-14-137/shared #replace YOUR_PATH with the correct path on your computer
@@ -210,8 +212,8 @@ found on the FastTree website: http://www.microbesonline.org/fasttree/.
 .. code:: bash
 
    $ wget http://www.microbesonline.org/fasttree/FastTree
-   $ chmod +x FastTree 
-   $ ./FastTree # test if FastTree is executable 
+   $ chmod +x FastTree
+   $ ./FastTree # test if FastTree is executable
 
    # Edit your ~/.bashrc to include FastTree to your PATH
    $ echo "export PATH=/YOUR_PATH:\$PATH" >> ~/.bashrc #replace YOUR_PATH with the correct path on your computer
@@ -234,11 +236,11 @@ following steps.
    mkdir R
    mkdir R/R_LIBS
    cd R
-   wget https://cran.r-project.org/src/base/R-4/R-4.0.2.tar.gz #version number might have changed already  
+   wget https://cran.r-project.org/src/base/R-4/R-4.0.2.tar.gz #version number might have changed already
    tar -xvf R-4.0.2.tar.gz
    cd R-4.0.2/
    ./configure --prefix=/YOUR_PATH/R/  #replace YOUR_PATH with the correct path on your computer
-   make 
+   make
 
    # Edit your ~/.bashrc to include R to your PATH
    $ echo "export PATH=/YOUR_PATH/R/bin/:\$PATH" >> ~/.bashrc #replace YOUR_PATH with the correct path on your computer
@@ -308,9 +310,9 @@ is available at https://github.com/ParBLiSS/FastANI/. The manual for
 
 .. code:: bash
 
-   $ wget https://github.com/marbl/Mash/releases/download/v2.2/mash-Linux64-v2.2.tar 
+   $ wget https://github.com/marbl/Mash/releases/download/v2.2/mash-Linux64-v2.2.tar
    $ tar -xvf mash-Linux64-v2.2.tar
-   $ mv mash-Linux64-v2.2/mash . 
+   $ mv mash-Linux64-v2.2/mash .
 
    $ wget https://github.com/ParBLiSS/FastANI/releases/download/v1.32/fastANI-Linux64-v1.32.zip #
    $ unzip fastANI-Linux64-v1.32.zip
@@ -382,8 +384,8 @@ but it is not part of the standard set of predictions. To allow these
 predictions, https://phobius.sbc.su.se/, place the entire directory in
 the InterProScan/bin/ directory and edit the **interproscan.properties**
 configuration file. More information about including Phobius into the
-InterProScan analysis is found at
-https://interproscan-docs.readthedocs.io/en/latest/ActivatingLicensedAnalyses.html.
+InterProScan analysis is found `<https://interproscan-docs.readthedocs.io/
+en/latest/ActivatingLicensedAnalyses.html>`_.
 
 Install eggNOGmapper
 """"""""""""""""""""
diff --git a/docs/source/user_guide/mapping.rst b/docs/source/user_guide/mapping.rst
index c6e9dccfdbe6122ecd89845261aaa179252ddc3c..8bf6ea488d7cdda658f03af8e809c51244b75cfc 100644
--- a/docs/source/user_guide/mapping.rst
+++ b/docs/source/user_guide/mapping.rst
@@ -105,7 +105,7 @@ FASTQ file
    @SRR13153715.1 1/1
    TGGTCATACAGCAAAGCATAATTGTCACCATTACTATGGCAATCAAGCCAGCTATAAAACCTAGCCAAATGTACCATGGCCATTTTATATACTGCTCATACTTTCCAAGTTCTTGGAGATCGAT
    +
-   EEEEEEEEEEEEEEEAEEEE/EEEEE/AEEEEEEEEEEEEEE/EE/EEE/<EEEEEEE/EEEEEEEEEEEEEAEEEEEAEEEEEAEEAEEEEEEA<AAAEEAEEA<EE/EEEEAEAEA/EEAA/ 
+   EEEEEEEEEEEEEEEAEEEE/EEEEE/AEEEEEEEEEEEEEE/EE/EEE/<EEEEEEE/EEEEEEEEEEEEEAEEEEEAEEEEEAEEAEEEEEEA<AAAEEAEEA<EE/EEEEAEAEA/EEAA/
 
 Genome numbers file
 
diff --git a/docs/source/user_guide/msa.rst b/docs/source/user_guide/msa.rst
index c0b38b60422de81b90691c8a45a91774e1f6fc90..6b2bdb4aa0560ab277dc568b851b3caf71c685b4 100644
--- a/docs/source/user_guide/msa.rst
+++ b/docs/source/user_guide/msa.rst
@@ -100,10 +100,12 @@ Options
      - Exclude a selection of genomes.
    * - ``--homology-file``/``-H``
      - A text file with homology group node identifiers, separated by a comma.
-       Default is all homology groups. (Mutually exclusive with ``--homology-groups``.)
+       Default is all homology groups. (Mutually exclusive with
+       ``--homology-groups``.)
    * - ``--homology-groups``/``-G``
      - A comma separated list of homology group node identifiers.
-       Default is all homology groups. (Mutually exclusive with ``--homology-file``.)
+       Default is all homology groups. (Mutually exclusive with
+       ``--homology-file``.)
    * - ``--regions-file``/``-R``
      - A text file containing genome locations with on each line: a genome
        number, sequence number, begin and end position, separated by a space.
@@ -140,7 +142,7 @@ region to extract the reverse complement sequence.
 .. code:: text
 
    1 1 1 10000
-   195 1 477722 478426 
+   195 1 477722 478426
    71 10 17346 18056 -
    138 47 159593 160300 -
 
diff --git a/docs/source/user_guide/phylogeny.rst b/docs/source/user_guide/phylogeny.rst
index 19798d8d9e884a478b42855a3aa4ca88ae2e3a38..3d3d0e416d718858fce38e8a95a0d9eb5f93fdd1 100644
--- a/docs/source/user_guide/phylogeny.rst
+++ b/docs/source/user_guide/phylogeny.rst
@@ -78,13 +78,13 @@ Options
    * - ``--homology-file``/``-H``
      - A file with homology group node identifiers of single copy groups.
        Default is single_copy_orthologs.csv, generated in the previous
-       :ref:`gene_classification <user_guide/characterize:gene classification>` run.
-       (Mutually exclusive with ``--homology-groups``.)
+       :ref:`gene_classification <user_guide/characterize:gene classification>`
+       run. (Mutually exclusive with ``--homology-groups``.)
    * - ``--homology-groups``/``-G``
      - A comma separated list of homology group node identifiers of single
        copy groups. Default is single_copy_orthologs.csv, generated in
-       the previous :ref:`gene_classification <user_guide/characterize:gene classification>` run.
-       (Mutually exclusive with ``--homology-file``.)
+       the previous :ref:`gene_classification <user_guide/characterize:gene
+       classification>` run. (Mutually exclusive with ``--homology-file``.)
    * - ``--protein``
      - Use proteins instead of nucleotide sequences.
    * - ``--phenotype``/``-p``
@@ -137,7 +137,9 @@ the phylogeny on **informative.fasta**.
    this file is a tree, whereas data with conflicting phylogenetic
    signals will result in a tree-like network. This type of tree/network
    can be visualized with a tool like  `SplitsTree
-   <https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/splitstree/>`_
+   <https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-
+   fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/
+   software/splitstree/>`_
 
 --------------
 
@@ -164,7 +166,9 @@ evolutionary distance between two genomes increases. So in the case of
 more distant genomes, the depicted clades are still correct but the
 extreme long branch lengths make the tree hard to decipher. To normalize
 the numbers, we implemented the `MASH distance
-<https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/splitstree/>`_.
+<https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-
+fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/
+software/splitstree/>`_.
 Distance = −1/𝑘 \* ln(J), where k is the *k*-mer length; J is the jaccard
 index (of distinct *k*-mers).
 
@@ -373,7 +377,7 @@ available in a pangenome database.
    2,Salmonella enteritidisi NCTC 12694
    3,
    4,Salmonella paratyphi NCTC 5702
-   5, 
+   5,
 
 Relevant literature
 ~~~~~~~~~~~~~~~~~~~
@@ -410,8 +414,8 @@ method that follow the rules for genetic nomenclature, so there are no
 differences in the naming of genes.
 
 To gain insight in which genes are appropriate for this analysis, run
-:ref:`gene_classification <user_guide/characterize:gene classification>` with the
-``--mlsa`` argument. This method creates a list of genes that have
+:ref:`gene_classification <user_guide/characterize:gene classification>` with
+the ``--mlsa`` argument. This method creates a list of genes that have
 same gene name, are present in all (selected) genomes and are placed in
 the single-copy homology group. Using genes from this list guarantees a
 successful MLSA.
@@ -489,8 +493,8 @@ Step 2 Concatenate genes
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 Concatenate sequences obtained by
-:ref:`mlsa_find_genes <user_guide/phylogeny:step 1 search for genes>` into a single
-sequence per genome. The ``--genes`` argument is required, but the
+:ref:`mlsa_find_genes <user_guide/phylogeny:step 1 search for genes>` into a
+single sequence per genome. The ``--genes`` argument is required, but the
 selection of gene names is allowed to be a sub-selection of the earlier
 selection.
 
@@ -749,8 +753,8 @@ Create tree template
 Creates 'ring' and 'colored range' ITOL templates based on phenotypes
 for the visualization of phylogenies in iTOL. Phenotypes must already be
 included in the pangenome with the
-:ref:`add_phenotypes <user_guide/construct:add phenotypes>` functionality. How to
-use the template files in iTOL can be found in one of the
+:ref:`add_phenotypes <user_guide/construct:add phenotypes>` functionality. How
+to use the template files in iTOL can be found in one of the
 :ref:`tutorials <tutorial/tutorial_part5:create itol templates>`.
 
 If you run this function without a ``--phenotype`` argument, templates
diff --git a/docs/source/user_guide/query.rst b/docs/source/user_guide/query.rst
index 93db3c132cfd0a5a775e3f37eb5fd63dad5825dc..9bf74e2854a5f9d05a626c7639e007e17490a32f 100644
--- a/docs/source/user_guide/query.rst
+++ b/docs/source/user_guide/query.rst
@@ -108,7 +108,7 @@ genome**
 
 .. code:: text
 
-   MATCH (n:homology_group)--(m:mRNA)--(g:gene) where n.num_members = 1 and g.genome = 1 RETURN g 
+   MATCH (n:homology_group)--(m:mRNA)--(g:gene) where n.num_members = 1 and g.genome = 1 RETURN g
 
 **Retrieve unique GO identifiers for mRNA’s with a signal peptide**
 
diff --git a/pom.xml b/pom.xml
index 7b0cbeefe4a45800b062d208d1da537a6d2c46a4..be908305967ae5e5355f55126f06a526e573269e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>nl.wur.bif</groupId>
     <artifactId>pantools</artifactId>
-    <version>4.0.0</version>
+    <version>4.1.0</version>
 
     <properties>
         <maven.compiler.source>8</maven.compiler.source>
@@ -109,6 +109,23 @@
             <artifactId>log4j-core</artifactId>
             <version>2.18.0</version>
         </dependency>
+        <dependency>
+            <groupId>com.esotericsoftware.kryo</groupId>
+            <artifactId>kryo5</artifactId>
+            <version>5.3.0</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>31.1-jre</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.9.0</version>
+        </dependency>
     </dependencies>
 
     <build>
diff --git a/src/main/java/nl/wur/bif/pantools/cli/ANI.java b/src/main/java/nl/wur/bif/pantools/cli/ANI.java
index 38624a55db1a46a1282635786820e5799aaf42a3..f594f9871ecbc91f783e2701c393cea303a10d84 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/ANI.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/ANI.java
@@ -39,9 +39,12 @@ public class ANI implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.calculate_ani();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java b/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java
index e97abeab0129ae1047d9b3e16ec6f4ceaaa4ba64..2cc33027816a0f804701c83ccc7863f099ad0ea7 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java
@@ -1,6 +1,8 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.validation.BeanValidation;
+import nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
@@ -8,7 +10,6 @@ import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
 import static nl.wur.bif.pantools.utils.Globals.*;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -34,9 +35,12 @@ public class AddAnnotations implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         annLayer.add_annotations();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java b/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java
index 5a079970c39a8658e6f8fbb0dc8fa10360a802f8..9eb175ec669bba69d47cb50e5c838c1c803484f2 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java
@@ -1,15 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
-import picocli.CommandLine.Model.CommandSpec;
 import picocli.CommandLine.*;
+import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 
 /**
  * Include antiSMASH gene clusters into the pangenome.
@@ -35,9 +36,12 @@ public class AddAntiSMASH implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         functionalAnnotations.add_antismash();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java b/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java
index c2078e6f8895e829034456c8a521f9534785c059..d69aad969665400426363c82a101688ce6ce6a4a 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java
@@ -1,5 +1,6 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
@@ -7,9 +8,9 @@ import java.io.IOException;
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -39,9 +40,12 @@ public class AddFunctions implements Callable<Integer> {
 
     @Override
     public Integer call() throws IOException {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         functionalAnnotations.add_functional_annotations();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java b/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java
index 886ca9ff367ee3884c5fcd956272e31cba6d37f0..e58c03db5a4b067462d25b1dd8e0c2ed5e5e4a9a 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java
@@ -1,15 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
+import java.io.IOException;
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
+import static nl.wur.bif.pantools.utils.Globals.PATH_TO_THE_GENOMES_FILE;
+import static nl.wur.bif.pantools.utils.Globals.seqLayer;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Include additional genomes to an already available pangenome.
@@ -28,12 +30,18 @@ public class AddGenomes implements Callable<Integer> {
     @InputFile(message = "{file.genomes}")
     Path genomesFile;
 
+    @Option(names = "--scratch-directory")
+    Path scratchDirectory;
+
     @Override
-    public Integer call() {
+    public Integer call() throws IOException {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
-        seqLayer.add_genomes();
+
+        seqLayer.add_genomes(scratchDirectory);
         return 0;
     }
 
diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java b/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java
index 9c3582e819455415e60e433649ba44ed2be17a08..e6c781b5856408244c4505c9fcc5a62ff652d65f 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java
@@ -1,16 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Positive;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
-import jakarta.validation.constraints.Positive;
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Include phenotype data to the pangenome.
@@ -38,16 +39,11 @@ public class AddPhenotypes implements Callable<Integer> {
     boolean append;
 
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
         pantools.setPangenomeGraph();
-        setGlobalParameters(); //TODO: use local parameters instead
-        classification.addPhenotype();
+        classification.addPhenotype(phenotypesFile, bins, append);
         return 0;
     }
 
-    private void setGlobalParameters() {
-        PHENOTYPE = phenotypesFile.toString();
-        APPEND = append;
-        BINS = Integer.toString(bins);
-    }
 }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java b/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java
index ea5fef5e20dfda7c463d374574607e2c5acd82d7..79fd7e916c29121e9914e2adf94ecc82c7bf6998 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java
@@ -1,17 +1,20 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
+import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
+import nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import nl.wur.bif.pantools.pantools.Pantools;
-import nl.wur.bif.pantools.cli.validation.Constraints.OutputDirectory;
 
-import jakarta.validation.constraints.Max;
+import java.io.IOException;
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.MinOrZero;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Build a pangenome out of a set of genomes.
@@ -23,8 +26,9 @@ public class BuildPangenome implements Callable<Integer> {
 
     @Spec CommandSpec spec;
 
+    @Mixin private ThreadNumber threadNumber;
+
     @ParentCommand
-    @OutputDirectory(directory = "databaseDirectory", message = "{output-database}")
     private Pantools pantools;
 
     @Parameters(descriptionKey = "genomes-file", index = "0+")
@@ -36,17 +40,52 @@ public class BuildPangenome implements Callable<Integer> {
     @Max(value = 255, message = "{max.ksize}")
     int kSize;
 
+    @Option(names = "--scratch-directory")
+    Path scratchDirectory;
+
+    @Option(names = "--num-buckets")
+    @Min(value = 1, message = "{min.num-buckets}")
+    int numBuckets;
+
+    @Option(names = "--transaction-size")
+    @Min(value = 10, message = "{min.transaction-size}")
+    int transactionSize;
+
+    @Option(names = "--num-db-writer-threads")
+    @Min(value = 1, message = "{min.num-db-writer-threads}")
+    int numDbWriterThreads;
+
+    @Option(names = "--cache-size")
+    @Min(value = 0, message = "{min.cache-size}")
+    int cacheSize;
+
+    @Option(names = "--keep-intermediate-files")
+    boolean keepIntermediateFiles;
+
     @Override
-    public Integer call() {
-        new BeanValidation().argValidation(spec, this);
+    public Integer call() throws IOException {
+        pantools.createDatabaseDirectory();
+        pantools.createLogger(spec);
+        new BeanValidation().argValidation(spec, this, threadNumber);
+
         setGlobalParameters(); //TODO: use local parameters instead
-        seqLayer.initialize_pangenome();
+
+        seqLayer.initialize_pangenome(
+            pantools.getDatabaseDirectory(),
+            scratchDirectory,
+            numBuckets,
+            transactionSize,
+            numDbWriterThreads,
+            cacheSize,
+            keepIntermediateFiles
+        );
         return 0;
     }
 
     private void setGlobalParameters() {
         PATH_TO_THE_GENOMES_FILE = genomesFile.toString();
         K_SIZE = (kSize == 0) ? -1 : kSize;
+        THREADS = threadNumber.getnThreads();
     }
 
     public Pantools getPantools() {return pantools;}
diff --git a/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java b/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java
index b5e8ac99c24296a03d38a46a29b96742e271d552..93b79bf070271b0bb5e3f1dad2d2904d29054c87 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java
@@ -1,16 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
+import java.io.IOException;
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
+import static nl.wur.bif.pantools.utils.Globals.PATH_TO_THE_PROTEOMES_FILE;
+import static nl.wur.bif.pantools.utils.Globals.proLayer;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Build a panproteome out of a set of proteins.
@@ -23,7 +24,6 @@ public class BuildPanproteome implements Callable<Integer> {
     @Spec CommandSpec spec;
 
     @ParentCommand
-    @OutputDirectory(directory = "databaseDirectory", message = "{output-database}")
     private Pantools pantools;
 
     @Parameters(descriptionKey = "proteomes-file", index = "0+")
@@ -31,10 +31,14 @@ public class BuildPanproteome implements Callable<Integer> {
     Path proteomesFile;
 
     @Override
-    public Integer call() {
+    public Integer call() throws IOException {
+        pantools.createDatabaseDirectory();
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         setGlobalParameters(); //TODO: use local parameters instead
-        proLayer.initialize_panproteome();
+
+        proLayer.initialize_panproteome(pantools.getDatabaseDirectory());
         return 0;
     }
 
diff --git a/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java b/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java
index 80e415ce7e2d464ab8e45a349cd7bd9daba2f161..da3b40db6059b3b09775f4bbd353e36e58f06160 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java
@@ -1,21 +1,22 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Pattern;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
-import jakarta.validation.constraints.Pattern;
 import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 
 import static jakarta.validation.constraints.Pattern.Flag.CASE_INSENSITIVE;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Identify BUSCO genes in the pangenome.
@@ -63,9 +64,12 @@ public class BuscoProtein implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes, buscoDatasets);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.busco_protein();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java
index e49e82770ad546a45f13ab816c8ff1812d116f81..94ae5b16721b4f661d84ca51e753cc3ae7bf2b07 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java
@@ -1,16 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Positive;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
-import picocli.CommandLine.Command;
 
-import jakarta.validation.constraints.Positive;
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.utils.Globals.GROUPING_VERSION;
+import static nl.wur.bif.pantools.utils.Globals.proLayer;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Change the active version of the homology grouping.
@@ -32,9 +32,12 @@ public class ChangeGrouping implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         proLayer.change_active_grouping();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java b/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java
index e5e29fbe6c210e3a727acec58c9107c0bd2783c5..779dd1b260e267747c360802af76e788d32fe4f3 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java
@@ -1,18 +1,19 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Size;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Size;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.Patterns;
 import static nl.wur.bif.pantools.cli.validation.Constraints.Patterns.Flag.CASE_INSENSITIVE;
+import static nl.wur.bif.pantools.utils.Globals.*;
 import static nl.wur.bif.pantools.utils.StringParser.stringToIntegerList;
 import static picocli.CommandLine.*;
 
@@ -52,9 +53,12 @@ public class CompareGO implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes, identifiers);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         functionalAnnotations.compare_go();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java b/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java
index 2d7ad5153f2a95f121379440b924adfa74d912ea..48bf4217bfa2f0f93260b859b0dda7653e7c1efc 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java
@@ -2,18 +2,15 @@ package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
-import nl.wur.bif.pantools.utils.Utils;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.io.IOException;
-import java.nio.file.Path;
-import java.util.List;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -40,6 +37,7 @@ public class ConsensusTree implements Callable<Integer> {
 
     @Override
     public Integer call() throws IOException {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectHmGroups);
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
diff --git a/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java b/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java
index 5d9749f45bac93002d624a73d0b0859ef6255d44..38e1a036799b8cdde6f72b4211437c6b44e349f9 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java
@@ -1,23 +1,19 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Pattern;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
 import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
-import nl.wur.bif.pantools.utils.Utils;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Pattern;
-
 import java.io.IOException;
-import java.nio.file.Path;
-import java.util.List;
 import java.util.concurrent.Callable;
 
 import static jakarta.validation.constraints.Pattern.Flag.CASE_INSENSITIVE;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -49,6 +45,7 @@ public class CorePhylogeny implements Callable<Integer> {
 
     @Override
     public Integer call() throws IOException {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes, selectHmGroups);
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
diff --git a/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java b/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java
index 33319939fd06f138812863217f6e44dd8b0214d3..69d0d47aea3da678da080c269561549b29b2037d 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java
@@ -1,14 +1,15 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.utils.Globals.classification;
+import static nl.wur.bif.pantools.utils.Globals.setGenomeSelectionOptions;
 import static picocli.CommandLine.*;
 
 /**
@@ -28,9 +29,12 @@ public class CoreUniqueThresholds implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.core_unique_thresholds();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java b/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java
index fc6103538c705e08e2bcea651af981a322c5c6b5..5c2abc01a77923398263cd47a928fadecf2d7eb4 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java
@@ -1,14 +1,14 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Positive;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Positive;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -34,9 +34,12 @@ public class CreateTreeTemplate implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.create_tree_templates();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/ExportPangenome.java b/src/main/java/nl/wur/bif/pantools/cli/ExportPangenome.java
new file mode 100644
index 0000000000000000000000000000000000000000..582a5e7128805b755907f87e61f94fa9c8a00255
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/cli/ExportPangenome.java
@@ -0,0 +1,46 @@
+package nl.wur.bif.pantools.cli;
+
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
+import nl.wur.bif.pantools.pangenome.export.PangenomeExporter;
+import nl.wur.bif.pantools.pantools.Pantools;
+import picocli.CommandLine;
+import picocli.CommandLine.Model.CommandSpec;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.concurrent.Callable;
+
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static picocli.CommandLine.*;
+
+/**
+ * Classify functional annotations as core, accessory or unique.
+ *
+ * @author Roel van Esch, Wageningen University, the Netherlands.
+ */
+@CommandLine.Command(name = "export_pangenome", sortOptions = false, abbreviateSynopsis = true)
+public class ExportPangenome implements Callable<Integer> {
+    @Spec CommandSpec spec;
+
+    @ParentCommand
+    @GraphDatabase
+    private Pantools pantools;
+
+    @Option(names = {"--node-properties-file"}, required = true)
+    Path nodePropertiesFile;
+
+    @Option(names = {"--relationship-properties-file"}, required = true)
+    Path relationshipPropertiesFile;
+
+    @Option(names = {"--sequence-node-anchors-file"}, required = true)
+    Path sequenceNodeAnchorsFile;
+
+    @Override
+    public Integer call() throws IOException {
+        pantools.createLogger(spec);
+        new BeanValidation().argValidation(spec, this);
+        pantools.setPangenomeGraph();
+        new PangenomeExporter(nodePropertiesFile, relationshipPropertiesFile, sequenceNodeAnchorsFile);
+        return 0;
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java
index 66b0e117dac8e1eb3ebbd59564d90d83a5e6c38f..b54ca599f0b979a606acd69f9414e06c1d6bcc81 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java
@@ -1,17 +1,18 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Size;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Size;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.Patterns;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static nl.wur.bif.pantools.utils.StringParser.stringToIntegerList;
 import static picocli.CommandLine.*;
 
@@ -51,9 +52,12 @@ public class FindGenesByAnnotation implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes, identifiers);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.find_genes_by_annotation();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java
index 15d4c54096f64ebfe0a9b97e34e034e45428f96c..5619f6ab9fbce04f4b349c3a26718ab754de32fe 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java
@@ -1,6 +1,7 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
@@ -8,9 +9,8 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -39,9 +39,12 @@ public class FindGenesByName implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, Arrays.asList(this, selectGenomes));
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.mlsa_find_genes(false);
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java b/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java
index 173fa970ac9aeef344c68b58e54c7e16cae56668..d145434ada90bcd7c061a10fc202fb3cf84f10bb 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java
@@ -1,14 +1,15 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -33,9 +34,12 @@ public class FindGenesInRegion implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.find_genes_in_region();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java b/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java
index 608b954caab54357c2b525631023f7a7accf267a..495974e614700f8aaf2a4fc8bb77362fc4e12d68 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java
@@ -1,15 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -33,9 +34,12 @@ public class FunctionOverview implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         functionalAnnotations.functional_annotation_overview();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java b/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java
index d7f66ea31f32a6604e7e4f7ebb02ba89c82273be..95c2d887e9c6cec12ad438df742a7ca94a7a27c9 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java
@@ -1,17 +1,18 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Max;
-import jakarta.validation.constraints.Min;
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -48,9 +49,12 @@ public class FunctionalClassification implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         functionalAnnotations.functional_classification();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java b/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java
index ccd53c9bea415d50a7a84de53f072ad69f100ec5..3a7e0a98eabfdcefe81fc73a0eb5dabd031c8d76 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java
@@ -1,21 +1,22 @@
 package nl.wur.bif.pantools.cli;
 
-import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
-import nl.wur.bif.pantools.pantools.Pantools;
-
 import jakarta.validation.constraints.Max;
 import jakarta.validation.constraints.Min;
 import jakarta.validation.constraints.Size;
+import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
+import nl.wur.bif.pantools.pantools.Pantools;
+
 import java.nio.file.Path;
 import java.util.List;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static nl.wur.bif.pantools.utils.StringParser.stringToIntegerList;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Identify over or underrepresented GO terms in a set of genes.
@@ -53,9 +54,12 @@ public class GOEnrichment implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes, identifiers);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         functionalAnnotations.go_enrichment();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java b/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java
index 2f0653e0e7d2f0a70f2d6c473dcd51455f592fab..06503bac16baf08401165c3255484af997573219 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java
@@ -1,17 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
-import jakarta.validation.constraints.Max;
-import jakarta.validation.constraints.Min;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Classify the gene repertoire as core, accessory or unique.
@@ -51,9 +51,12 @@ public class GeneClassification implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.gene_classification();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/Group.java b/src/main/java/nl/wur/bif/pantools/cli/Group.java
index 4fd8ced3465a4f5509272b21e9d6998cb9dcc327..e7686e5357801ad0dffc5546b104484b416ae783 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/Group.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/Group.java
@@ -90,9 +90,12 @@ public class Group implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         proLayer.group();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java b/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java
index 25c1273c9d0eb5b3d97cab773d60f3a1d824d496..5eaf1ecc31dc0edd99b2f26e62588242a6de9370 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java
@@ -1,20 +1,19 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
 import nl.wur.bif.pantools.pantools.Pantools;
 
 import java.io.IOException;
-import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.stream.Collectors;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static nl.wur.bif.pantools.cli.validation.Constraints.Patterns.Flag.CASE_INSENSITIVE;
+import static nl.wur.bif.pantools.utils.Globals.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -52,17 +51,19 @@ public class GroupInfo implements Callable<Integer> {
 
     @Override
     public Integer call() throws IOException {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes, selectHmGroups);
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
-        classification.homology_group_info();
+
+        classification.homology_group_info(selectHmGroups.getHomologyGroups());
         return 0;
     }
 
     private void setGlobalParameters() throws IOException {
         setGenomeSelectionOptions(selectGenomes);
-        SELECTED_HMGROUPS = selectHmGroups.getHomologyGroups().stream().map(String::valueOf).collect(Collectors.joining(","));
-        System.out.println("SELECTED_HMGROUPS = " + SELECTED_HMGROUPS);
+//        SELECTED_HMGROUPS = selectHmGroups.getHomologyGroups().stream().map(String::valueOf).collect(Collectors.joining(","));
+//        System.out.println("SELECTED_HMGROUPS = " + SELECTED_HMGROUPS);
         if (genes != null) SELECTED_NAME = genes.toString().replaceAll("[\\[\\]]", "");
         if (functions != null) SELECTED_LABEL = functions.toString().replaceAll("[\\[\\]]", "");
 
diff --git a/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java b/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java
index 28a0d0959130688836a4d0a6a615c0e06c277cd8..fbadd8b243e974ae8b7be01513fddee71aebdd4e 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java
@@ -1,13 +1,14 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.utils.Globals.FAST;
+import static nl.wur.bif.pantools.utils.Globals.proLayer;
 import static picocli.CommandLine.*;
 
 /**
@@ -29,9 +30,12 @@ public class GroupingOverview implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         proLayer.grouping_overview();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java b/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java
index 76c49967f8d9c4678654ab2a4f101a82f22e8dbd..7560c8a1ffdeb4c83f0c4a320aaf660622f18789 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java
@@ -1,17 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
-import jakarta.validation.constraints.Max;
-import jakarta.validation.constraints.Min;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Calculate the number of core, accessory, unique, k-mer sequences.
@@ -51,9 +51,12 @@ public class KmerClassification implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.kmer_classification2();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java b/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java
index 67134ed0ce6bab03dd1c7a320f8dea6da31bcc12..3cb45f0de0eeebe5691d82da3bcb4ed6b8aba5b5 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java
@@ -8,15 +8,14 @@ import picocli.CommandLine.Model.CommandSpec;
 
 import jakarta.validation.constraints.Max;
 import jakarta.validation.constraints.Min;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 
 import java.io.IOException;
-import java.nio.file.Path;
 import java.util.concurrent.Callable;
 import java.util.stream.Collectors;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -56,9 +55,11 @@ public class LocateGenes implements Callable<Integer> {
 
     @Override
     public Integer call() throws IOException {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes, selectHmGroups);
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.locate_genes();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/MLSA.java b/src/main/java/nl/wur/bif/pantools/cli/MLSA.java
index fea414fcc65ad1a4f93aa31dc60015cc2c2c83d4..af217326fb11097998418e71bd38d221521c691a 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/MLSA.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/MLSA.java
@@ -2,14 +2,14 @@ package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -33,9 +33,12 @@ public class MLSA implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.run_MLSA();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java b/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java
index 38fdb97e6685d4122702baf822ee95341d5c971b..17679b1a3ba11055a294e3b1be49ffcfa097ec19 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java
@@ -2,6 +2,7 @@ package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
@@ -9,9 +10,8 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -41,9 +41,12 @@ public class MLSAConcatenate implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.mlsa_concatenate();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java b/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java
index 7c81aa04ab58cbc358073549bf6947394c604322..d0f0be9686e3a28ccac01a2fe78af013247a6cd2 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java
@@ -1,6 +1,7 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import nl.wur.bif.pantools.pantools.Pantools;
 
@@ -9,9 +10,8 @@ import java.util.List;
 import java.util.concurrent.Callable;
 
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Step 1/3 of MLSA. Search and filter suitable genes for the MLSA.
@@ -39,9 +39,12 @@ public class MLSAFindGenes implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.mlsa_find_genes(true);
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/MSA.java b/src/main/java/nl/wur/bif/pantools/cli/MSA.java
index 6fe7e7016e3a2da653c1fe07a4ef446af90a58dd..258eff7bb3783d9700a2fb9972a8d6a9d399dd01 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/MSA.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/MSA.java
@@ -1,18 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
-import cern.jet.math.Mult;
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
+import jakarta.validation.constraints.Pattern;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
-import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pangenome.MultipleSequenceAlignment;
 import nl.wur.bif.pantools.pantools.Pantools;
-import nl.wur.bif.pantools.utils.Utils;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Max;
-import jakarta.validation.constraints.Min;
-import jakarta.validation.constraints.Pattern;
-
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.Arrays;
@@ -20,9 +18,8 @@ import java.util.List;
 import java.util.concurrent.Callable;
 
 import static jakarta.validation.constraints.Pattern.Flag.CASE_INSENSITIVE;
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.utils.Globals.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -91,14 +88,15 @@ public class MSA implements Callable<Integer> {
 
     @Override
     public Integer call() throws IOException {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes, selectHmGroups);
         pantools.setPangenomeGraph();
         method = method.replace("-", "_");
         setGlobalParameters(); //TODO: use local parameters instead
+
         MultipleSequenceAlignment msa = new MultipleSequenceAlignment(
                 method,
                 variants,
-                true,
                 selectHmGroups.getHomologyGroups()
         );
         msa.alignSequences(true, true);
@@ -111,7 +109,6 @@ public class MSA implements Callable<Integer> {
         if (functions != null) SELECTED_NAME = functions.toString().replaceAll("[\\[\\]]", "");
         THREADS = threadNumber.getnThreads();
         phenotype_threshold = phenotypeThreshold;
-//        msaMethod = method;
         Mode = (mode == null) ? "0" : mode.toUpperCase();
         PHENOTYPE = phenotype;
         TRIMMING = trimming;
diff --git a/src/main/java/nl/wur/bif/pantools/cli/Map.java b/src/main/java/nl/wur/bif/pantools/cli/Map.java
index f81ae017d880079e0bbdb09b571dd4a481f483ca..72aa4233a47ed206ce2cde6a8887294a6d5b9160 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/Map.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/Map.java
@@ -1,19 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.*;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
-import jakarta.validation.constraints.*;
-
 import static jakarta.validation.constraints.Pattern.Flag.CASE_INSENSITIVE;
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.utils.Globals.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -178,9 +176,12 @@ public class Map implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, hitsOptions);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         seqLayer.map_reads();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/Metrics.java b/src/main/java/nl/wur/bif/pantools/cli/Metrics.java
index 1c1912817f4a53d83a8ddc8e86216ab7e335fc5e..6bc6e513e72dd3749bf9393118f6d249da22e5f3 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/Metrics.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/Metrics.java
@@ -1,16 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
+import nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import nl.wur.bif.pantools.pantools.Pantools;
-import picocli.CommandLine.Command;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -34,9 +34,12 @@ public class Metrics implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.metrics();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java
index 7a63be333283079ab78c7117f5773b95a54626fc..1bf277270286fb25c8613bbee1f8dc8cd21596f5 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java
@@ -1,13 +1,14 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.utils.Globals.FAST;
+import static nl.wur.bif.pantools.utils.Globals.proLayer;
 import static picocli.CommandLine.*;
 
 /**
@@ -29,9 +30,12 @@ public class MoveGrouping implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         proLayer.move_grouping(true);
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java
index 7af905e9a73236abb83447a3514bb9363fbcc70a..4db8aaa3305244662f342f7917eb8e0fd0fe5bc6 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java
@@ -61,9 +61,12 @@ public class OptimalGrouping implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         proLayer.optimal_grouping();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java b/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java
index bb2293c622f9cad50127b8d178516614dc8fd87f..d5987f2e81ba346b5726166002a49f6f459b4b87 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java
@@ -1,15 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -36,9 +37,12 @@ public class OrderMatrix implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.order_matrix(true);
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java b/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java
index b3694dc4a7ad6a2b9ebc5a3ea64b16529676ca69..c3605db48aa4ffb4569994c96f09dec1fb387262 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java
@@ -1,18 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
 import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Max;
-import jakarta.validation.constraints.Min;
-
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -41,7 +40,9 @@ public class PangenomeStructure implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
+
         pantools.setPangenomeGraph();
         if (kmer) {
             if (loops == 0) loops = 100; // set default value based on -k
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java
index a52f03777876ea0b2cf62101a3416c817b2d6ba7..2fd6e726d2ba6047f74acb96b6538a64a2cb104f 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java
@@ -1,16 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Remove all the genomic features that belong to annotations.
@@ -38,9 +39,12 @@ public class RemoveAnnotations implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, exclusiveOptions, exclusiveOptions.selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         annLayer.remove_annotations();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java
index c25c451ea082e8ee6bd03498f54bf991c9e163e1..194bf6cecd4c717455e6e462f0d9461d88550dd0 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java
@@ -1,16 +1,15 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Pattern;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pangenome.Classification;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Pattern;
-
 import java.util.concurrent.Callable;
 
 import static jakarta.validation.constraints.Pattern.Flag.CASE_INSENSITIVE;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static picocli.CommandLine.*;
 
 /**
@@ -34,8 +33,11 @@ public class RemoveFunctions implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
+
         final Classification classification = new Classification();
         classification.removeFunctions(this);
         return 0;
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java
index 3ee4810976dd2dd4b7dd8c1658922997dea0d667..765291a923e50d90d8c7fd9d6dade11f3c03e834 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java
@@ -35,9 +35,12 @@ public class RemoveGrouping implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         proLayer.remove_grouping();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java
index 7fa23a1d05a491faa7d2bd94d109836116af3a38..745e7f2fcfdf35e87f7d0687aa16515242ec467b 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java
@@ -1,19 +1,19 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Size;
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.cli.validation.Constraints.ExcludePatterns;
 import nl.wur.bif.pantools.pantools.Pantools;
 
-import jakarta.validation.constraints.Size;
 import java.util.List;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static nl.wur.bif.pantools.utils.StringParser.stringToIntegerList;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Remove a selection of nodes and their relationships from the pangenome.
@@ -63,9 +63,12 @@ public class RemoveNodes implements Callable<Integer> {
 
     @Override
     public Integer call() {
-        new BeanValidation().argValidation(spec, this, identifiers, identifiers.labelOptions, identifiers.labelOptions.selectGenomes);
+        pantools.createLogger(spec);
+        new BeanValidation().argValidation(spec, this, identifiers, identifiers.labelOptions.selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.remove_nodes();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotype.java b/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotypes.java
similarity index 80%
rename from src/main/java/nl/wur/bif/pantools/cli/RemovePhenotype.java
rename to src/main/java/nl/wur/bif/pantools/cli/RemovePhenotypes.java
index cad0ef7cf596aaadd363a0af0d2fd463eaa9954f..0cc4b8bd579eed5d57891c89f284f01d5dfce366 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotype.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotypes.java
@@ -1,14 +1,14 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -16,8 +16,8 @@ import static picocli.CommandLine.*;
  *
  * @author Roel van Esch, Wageningen University, the Netherlands.
  */
-@Command(name = "remove_phenotype", sortOptions = false)
-public class RemovePhenotype implements Callable<Integer> {
+@Command(name = "remove_phenotypes", sortOptions = false)
+public class RemovePhenotypes implements Callable<Integer> {
 
     @Spec CommandSpec spec;
     @ArgGroup private SelectGenomes selectGenomes;
@@ -31,10 +31,13 @@ public class RemovePhenotype implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
-        //TODO: make function
+
+        classification.removePhenotype();
         return 0;
     }
 
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java b/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java
index fba0d0be7807c4fdb2f9cee901db4a38ec163668..be237e3e7b62550df5d27bd0b95c163a474811a7 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java
@@ -1,15 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -42,9 +43,12 @@ public class RenameMatrix implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         classification.rename_matrix();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java b/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java
index 6d11073a62e4db4459b009cd78f2ced949f6a533..5dfa13556f8c30b0d3ad712053ab24957df33d37 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java
@@ -1,15 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * Update or alter the terminal nodes (leaves) of a phylogenic tree.
@@ -37,9 +38,12 @@ public class RenamePhylogeny implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.rename_phylogeny();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java b/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java
index 05a9addcf12de59e6ada5240926ba7140779ac67..6328d393a2f6923e9113784f1990fa004cbd7619 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java
@@ -1,14 +1,14 @@
 package nl.wur.bif.pantools.cli;
 
 import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
 
 /**
@@ -31,9 +31,12 @@ public class RetrieveFeatures implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, selectGenomes);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         annLayer.retrieve_features();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java b/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java
index ea4091761803dfb5dbd8bd97b6a0c1614e9d278c..533548f4f288c4bc426d88a2fdfce51f883d1e55 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java
@@ -1,14 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
-import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
+import static nl.wur.bif.pantools.utils.Globals.PATH_TO_THE_REGIONS_FILE;
+import static nl.wur.bif.pantools.utils.Globals.seqLayer;
 import static picocli.CommandLine.*;
 
 /**
@@ -31,9 +33,12 @@ public class RetrieveRegions implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         seqLayer.retrieve_regions();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java b/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java
index edb95ee1c156bf69212668c0f831c53b76550e55..46cc0b000d265e587d24c2c8694f6b004d631974 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java
@@ -1,15 +1,16 @@
 package nl.wur.bif.pantools.cli;
 
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 
 import java.nio.file.Path;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static picocli.CommandLine.*;
-import static picocli.CommandLine.Model.*;
+import static picocli.CommandLine.Model.CommandSpec;
 
 /**
  * (Re)root a phylogenetic tree.
@@ -34,9 +35,12 @@ public class RootPhylogeny implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this);
+
         pantools.setPangenomeGraph("pangenome");
         setGlobalParameters(); //TODO: use local parameters instead
+
         phylogeny.reroot_phylogeny();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java b/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java
index 93b538d86dc48a5db4df7e2732661f164dda23d5..b1501d1906513ee3ddfe1a75f5481e232e03c4e2 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java
@@ -1,16 +1,17 @@
 package nl.wur.bif.pantools.cli;
 
+import jakarta.validation.constraints.Size;
+import nl.wur.bif.pantools.cli.validation.BeanValidation;
 import nl.wur.bif.pantools.pantools.Pantools;
 import picocli.CommandLine.Model.CommandSpec;
 
-import jakarta.validation.constraints.Size;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 
+import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
+import static nl.wur.bif.pantools.cli.validation.Constraints.Patterns;
 import static nl.wur.bif.pantools.utils.Globals.*;
-import nl.wur.bif.pantools.cli.validation.BeanValidation;
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
 import static nl.wur.bif.pantools.utils.StringParser.stringToIntegerList;
 import static picocli.CommandLine.*;
 
@@ -48,9 +49,12 @@ public class ShowGO implements Callable<Integer> {
 
     @Override
     public Integer call() {
+        pantools.createLogger(spec);
         new BeanValidation().argValidation(spec, this, identifiers);
+
         pantools.setPangenomeGraph();
         setGlobalParameters(); //TODO: use local parameters instead
+
         functionalAnnotations.show_go();
         return 0;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectHmGroups.java b/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectHmGroups.java
index 10410f56d6ffcc364533bc5d6457793a6bef75ca..100921d95576e23dd46f6bdab6947b8131d43657 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectHmGroups.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectHmGroups.java
@@ -9,7 +9,6 @@ import static picocli.CommandLine.*;
 
 import java.io.IOException;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.List;
 
 /**
@@ -21,7 +20,7 @@ public class SelectHmGroups {
 
     @Option(names = {"-G", "--homology-groups"})
     void setHomologyGroups(String homologyGroupsString) {
-        homologyGroups = StringParser.stringToIntegerList(homologyGroupsString);
+        homologyGroups = StringParser.stringToLongList(homologyGroupsString);
     }
 
     @Option(names = {"-H", "--homology-file"})
@@ -29,9 +28,9 @@ public class SelectHmGroups {
     private Path homologyGroupsFile;
 
     @Size(min = 1, message = "{size.empty.homology-groups}")
-    private List<Integer> homologyGroups;
+    private List<Long> homologyGroups;
 
-    public List<Integer> getHomologyGroups() throws IOException, NumberFormatException {
+    public List<Long> getHomologyGroups() throws IOException, NumberFormatException {
         if (homologyGroups != null) return homologyGroups;
         return (homologyGroupsFile == null) ? null : Utils.parseHmFile(homologyGroupsFile);
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java b/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java
index 8545ed0371d0d78ddaef71b79cc285aa85da0822..e2cc63504a77358df4db40a44055fdb919b95726 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java
@@ -2,9 +2,7 @@ package nl.wur.bif.pantools.cli.mixins;
 
 import jakarta.validation.constraints.Positive;
 
-import static nl.wur.bif.pantools.cli.validation.Constraints.*;
-import static nl.wur.bif.pantools.cli.validation.Payloads.Info;
-import static picocli.CommandLine.*;
+import static picocli.CommandLine.Option;
 
 /**
  * Mixin class for '--threads' option.
@@ -23,7 +21,6 @@ public class ThreadNumber {
     }
 
     @Positive(message = "{positive.threads}")
-    @MatchCores(message = "{match.cores}", payload = Info.class)
     private int nThreads;
 
     public int getnThreads() {
diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/BeanValidation.java b/src/main/java/nl/wur/bif/pantools/cli/validation/BeanValidation.java
index 12f4b69dd3921e00ded4408de2f5e738572cd366..f68379be3bdf42142d0d814c3395eadf91135854 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/validation/BeanValidation.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/validation/BeanValidation.java
@@ -1,14 +1,16 @@
 package nl.wur.bif.pantools.cli.validation;
 
+import jakarta.validation.ConstraintValidatorContext;
+import jakarta.validation.ConstraintViolation;
+import jakarta.validation.Validation;
+import jakarta.validation.Validator;
+import nl.wur.bif.pantools.pantools.Pantools;
 import org.hibernate.validator.constraintvalidation.HibernateConstraintValidatorContext;
 import org.hibernate.validator.messageinterpolation.ResourceBundleMessageInterpolator;
 import org.hibernate.validator.resourceloading.PlatformResourceBundleLocator;
-import picocli.CommandLine;
 import picocli.CommandLine.Model.CommandSpec;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
+import picocli.CommandLine.ParameterException;
 
-import jakarta.validation.*;
 import java.util.Set;
 
 /**
@@ -19,35 +21,30 @@ import java.util.Set;
 public class BeanValidation {
 
     /**
-     * Validate jakarta bean validation constraints for a number of objects and their parameters
+     * Validate jakarta bean validation constraints for a number of bean objects and their parameters
      * @param spec CommandSpec from the pantools subcommand to throw a ParameterException
-     * @param objects @Command annotated subcommand class, @ParentCommand annotated Pantools class, @Mixin or @ArgGroup
-     *                annotated classes containing options that need to be validated.
+     * @param beans @Command annotated subcommand class, @ParentCommand annotated Pantools class, @Mixin or @ArgGroup
+     *              annotated classes containing options that need to be validated.
+     * @exception ParameterException thrown when one ore more beans are invalid
      */
-    public void argValidation(CommandSpec spec, Object ... objects) {
-
-        final StringBuilder errorMsg = new StringBuilder();
+    public void argValidation(CommandSpec spec, Object ... beans) throws ParameterException {
         final Validator validator = createValidator();
 
-        for (Object object : objects) {
-            if (object == null) continue;
-            Set<ConstraintViolation<Object>> violations = validateObject(object, validator);
-            logViolations(violations, errorMsg);
+        boolean isValid = true;
+        for (Object bean : beans) {
+            if (bean == null) continue;
+            if (!isValidBean(bean, validator)) isValid = false;
         }
 
-        if (errorMsg.length() != 0) {
-            throw new CommandLine.ParameterException(spec.commandLine(), errorMsg.toString().replaceAll("[\n\r]$", ""));
-        }
+        if (!isValid) throw new ParameterException(spec.commandLine(), "One or more input parameters are not valid");
     }
 
-    /**
-     * Validate all constraint annotations within a class
-     * @param object class to validate
-     * @param validator validator to validate the class with
-     * @return set of collected constraint violations
-     */
-    public Set<ConstraintViolation<Object>> validateObject(Object object, Validator validator) {
-        return validator.validate(object);
+    private boolean isValidBean(Object bean, Validator validator) {
+        final Set<ConstraintViolation<Object>> violations = validator.validate(bean);
+        for (ConstraintViolation<?> violation : violations) {
+            Pantools.logger.error(violation.getMessage());
+        }
+        return violations.size() == 0;
     }
 
     /**
@@ -66,48 +63,14 @@ public class BeanValidation {
                 .getValidator();
     }
 
-    /**
-     * Validate all options within a class and collects violation messages as report, warning or error messages.
-     * @param violations set of collected constraint violations
-     */
-    private void logViolations(Set<ConstraintViolation<Object>> violations, StringBuilder errorMsg) {
-
-        final Logger validationLogger = LogManager.getLogger(BeanValidation.class);
-        // for every class, validate all options annotated with jakarta bean validation constraints
-        for (ConstraintViolation<?> violation : violations) {
-            // get constraint payloads
-            Set<Class<? extends Payload>> payloads = violation.getConstraintDescriptor().getPayload();
-
-            boolean unknownPayload  = true;
-            for (Class<? extends Payload> payload : payloads) {
-                if (payload == Payloads.Info.class) {
-                    validationLogger.info(violation.getMessage());
-                    unknownPayload  = false;
-                } else if (payload == Payloads.Warn.class) {
-                    validationLogger.warn(violation.getMessage());
-                    unknownPayload  = false;
-                } else if (payload == Payloads.Error.class) {
-                    validationLogger.error(violation.getMessage());
-                    unknownPayload  = false;
-                    errorMsg.append("ERROR: ").append(violation.getMessage()).append("\n");
-                }
-            }
-
-            // if no payload is specified, ERROR is used
-            if (unknownPayload) {
-                validationLogger.error(violation.getMessage());
-                errorMsg.append("ERROR: ").append(violation.getMessage()).append("\n");
-            }
-        }
-    }
-
     /**
      * Disables the default constraint violation message sets a new custom message.
      * @param context ConstraintValidatorContext containing the constraint violation message
      * @param msg violation message to be added
      */
     public static void setViolationMessage(ConstraintValidatorContext context, String msg) {
-        HibernateConstraintValidatorContext hibernateContext = context.unwrap(HibernateConstraintValidatorContext.class);
+        HibernateConstraintValidatorContext hibernateContext;
+        hibernateContext = context.unwrap(HibernateConstraintValidatorContext.class);
         hibernateContext.disableDefaultConstraintViolation();
         hibernateContext.buildConstraintViolationWithTemplate(msg).enableExpressionLanguage().addConstraintViolation();
     }
diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java b/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java
index 97df1e01418164dadeb41b3a48668b2aa0f3e582..eb9e30ebf2c127d4cf973142fb5c2cd1f4e165bf 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java
@@ -21,9 +21,9 @@ public class Constraints {
      * Usage: @GraphDatabase Pantools pantools
      */
     @Documented
-    @Target({ElementType.FIELD, TYPE})
+    @Target({FIELD, TYPE})
     @Retention(RUNTIME)
-    @Constraint(validatedBy = {PantoolsDatabaseValidator.class, GraphDatabaseValidator.class})
+    @Constraint(validatedBy = PantoolsDatabaseValidator.class)
     public @interface GraphDatabase {
 
         String message() default "Directory not found (${validatedValue})";
@@ -171,29 +171,12 @@ public class Constraints {
         int[] value();
     }
 
-    /**
-     * Compare the number of threads and cores
-     * Usage: @MatchCores int threads
-     */
-    @Documented
-    @Target({METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER, TYPE_USE})
-    @Retention(RUNTIME)
-    @Constraint(validatedBy = MatchCoresValidator.class)
-    public @interface MatchCores {
-
-        String message() default "${validatedValue} is not a valid value";
-
-        Class<?>[] groups() default {};
-
-        Class<? extends Payload>[] payload() default {};
-    }
-
     /**
      * Validates and output directory
      * Usage: <code>@OutputDirectory Path directory</code>
      */
     @Documented
-    @Target({ElementType.FIELD, TYPE})
+    @Target({FIELD, TYPE})
     @Retention(RUNTIME)
     @Constraint(validatedBy = OutputDirectoryValidator.class)
     public @interface OutputDirectory {
diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/Payloads.java b/src/main/java/nl/wur/bif/pantools/cli/validation/Payloads.java
deleted file mode 100644
index 402b1b7d7e9472fa4cfd87d47d0f89fd4c11b090..0000000000000000000000000000000000000000
--- a/src/main/java/nl/wur/bif/pantools/cli/validation/Payloads.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package nl.wur.bif.pantools.cli.validation;
-
-import jakarta.validation.Payload;
-
-/**
- * A set of classes to use as payloads for Bean Validation constraints.
- *
- * @author Roel van Esch, Wageningen University, the Netherlands.
- */
-public class Payloads {
-    /**
-     * Severity payload REPORT: violation messages are printed to System.out
-     */
-    public static class Info implements Payload {}
-
-    /**
-     * Severity payload WARNING: violation messages are printed to System.err
-     */
-    public static class Warn implements Payload {}
-
-    /**
-     * Severity payload ERROR: violation messages are printed to System.err and cause a ParameterException
-     */
-    public static class Error implements Payload {}
-}
diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/GraphDatabaseValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/GraphDatabaseValidator.java
deleted file mode 100644
index bb28fa7b33d6f7381b3d3115de9ba49f4b7b8c1b..0000000000000000000000000000000000000000
--- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/GraphDatabaseValidator.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package nl.wur.bif.pantools.cli.validation.validators;
-
-import nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
-
-import jakarta.validation.ConstraintValidator;
-import jakarta.validation.ConstraintValidatorContext;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.stream.Stream;
-
-import static nl.wur.bif.pantools.cli.validation.BeanValidation.setViolationMessage;
-
-/**
- * Custom ConstraintValidator that validates that the database directory exists and has an existing neo4j graph database.
- *
- * @author Roel van Esch, Wageningen University, the Netherlands.
- */
-public class GraphDatabaseValidator implements ConstraintValidator<GraphDatabase, Path> {
-
-    @Override
-    public void initialize(final GraphDatabase constraintAnnotation) {
-    }
-
-    /**
-     * Validates the output directory and neo4j graph database. The database is invalid when the path to the database
-     * or the neo4j graph database does not exist.
-     *
-     * @param databaseDirectory path to the database directory
-     * @param context ConstraintValidatorContext containing contextual data for a given constraint validator
-     * @return boolean for validity of the constraint
-     */
-    @Override
-    public boolean isValid(Path databaseDirectory, ConstraintValidatorContext context) {
-
-        if (!Files.isDirectory(databaseDirectory)) {
-            setViolationMessage(context, "Pangenome database directory not found");
-            return false;
-        }
-
-        if (!Files.isDirectory(databaseDirectory.resolve("databases/graph.db/"))) {
-            setViolationMessage(context, "Neo4j graph database does not exist");
-            return false;
-        }
-
-        try (Stream<Path> entries = Files.list(databaseDirectory)) {
-            setViolationMessage(context, "Neo4j graph database directory is empty");
-            return entries.findFirst().isPresent();
-        } catch (IOException e) {
-            setViolationMessage(context, e.getMessage());
-            return false;
-        }
-    }
-
-}
-
diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MatchCoresValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MatchCoresValidator.java
deleted file mode 100644
index b106cad59c393c9372a55d90508b6bec94f7e4e2..0000000000000000000000000000000000000000
--- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MatchCoresValidator.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package nl.wur.bif.pantools.cli.validation.validators;
-
-import nl.wur.bif.pantools.cli.validation.Constraints.MatchCores;
-import org.hibernate.validator.constraintvalidation.HibernateConstraintValidatorContext;
-
-import jakarta.validation.ConstraintValidator;
-import jakarta.validation.ConstraintValidatorContext;
-
-/**
- * Custom ConstraintValidator that verifies if the number of assigned threads match the number of available cores on
- * the machine.
- *
- * @author Roel van Esch, Wageningen University, the Netherlands.
- */
-public class MatchCoresValidator implements ConstraintValidator<MatchCores, Integer> {
-
-    int cores;
-
-    @Override
-    public void initialize(MatchCores constraintAnnotation) {
-        cores = Runtime.getRuntime().availableProcessors();
-    }
-
-    /**
-     * Verifies whether the number of threads are equal to the number of available cores.
-     * Sets a custom violation message based on which value is lower.
-     * @param threads number of allowed threads specified with --threads
-     * @param context ConstraintValidatorContext containing contextual data for a given constraint validator
-     * @return true if the number of threads are equal to the number of cores, false otherwise.
-     */
-    @Override
-    public boolean isValid(Integer threads, ConstraintValidatorContext context) {
-        // guard clause if threads and cores are equal
-        if (threads == cores) return true;
-
-        // create template violation message
-        String template = "--threads is ${validatedValue}, which is %s than the number of available cores (${cores})";
-        template = (threads > cores) ? String.format(template, "higher") : String.format(template, "lower");
-
-        // Disable the default violation message
-        HibernateConstraintValidatorContext hibernateContext = context.unwrap(HibernateConstraintValidatorContext.class);
-        hibernateContext.disableDefaultConstraintViolation();
-
-        // Add the custom violation message
-        hibernateContext.addExpressionVariable("cores", cores)
-                .buildConstraintViolationWithTemplate(template)
-                .enableExpressionLanguage()
-                .addConstraintViolation();
-
-        return false;
-    }
-}
diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java
index 897ac40f713aec7bb2cd686f31f59259daaa036e..6144c7ecce473ef4539638f81d0da8b863cfc151 100644
--- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java
+++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java
@@ -1,13 +1,14 @@
 package nl.wur.bif.pantools.cli.validation.validators;
 
+import jakarta.validation.ConstraintValidator;
+import jakarta.validation.ConstraintValidatorContext;
 import nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
 import nl.wur.bif.pantools.pantools.Pantools;
-import org.hibernate.validator.constraintvalidation.HibernateConstraintValidatorContext;
 
-import jakarta.validation.ConstraintValidator;
-import jakarta.validation.ConstraintValidatorContext;
+import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.stream.Stream;
 
 import static nl.wur.bif.pantools.cli.validation.BeanValidation.setViolationMessage;
 
@@ -23,8 +24,8 @@ public class PantoolsDatabaseValidator implements ConstraintValidator<GraphDatab
     }
 
     /**
-     * Validates the output directory and neo4j graph database. The database is invalid when the path to the database
-     * or the neo4j graph database does not exist.
+     * Validates the output directory's neo4j graph database. The database is invalid when the neo4j graph database
+     * does not exist or is empty.
      *
      * @param pantools @ParentCommand annotated Pantools class
      * @param context ConstraintValidatorContext containing contextual data for a given constraint validator
@@ -34,16 +35,17 @@ public class PantoolsDatabaseValidator implements ConstraintValidator<GraphDatab
     public boolean isValid(Pantools pantools, ConstraintValidatorContext context) {
         final Path databaseDirectory = pantools.getDatabaseDirectory();
 
-        if (!Files.isDirectory(databaseDirectory)) {
-            setViolationMessage(context, "Pangenome database directory not found");
+        if (!Files.isDirectory(databaseDirectory.resolve("databases").resolve("graph.db"))) {
+            setViolationMessage(context, "Neo4j graph database does not exist");
             return false;
         }
 
-        if (!Files.isDirectory(databaseDirectory.resolve("databases/graph.db/"))) {
-            setViolationMessage(context, "Neo4j graph database does not exist");
+        try (Stream<Path> entries = Files.list(databaseDirectory)) {
+            setViolationMessage(context, "Neo4j graph database directory is empty");
+            return entries.findFirst().isPresent();
+        } catch (IOException e) {
+            setViolationMessage(context, e.getMessage());
             return false;
         }
-
-        return true;
     }
 }
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java
index e3e242dda8cf9a9e61e832e031fc22076c5aea08..25eafa5d5fe2888ccd7dd6c23243c038606d62b0 100755
--- a/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java
@@ -10,13 +10,19 @@ import htsjdk.tribble.AbstractFeatureReader;
 import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.gff.Gff3Codec;
 import htsjdk.tribble.gff.Gff3Feature;
+
+import nl.wur.bif.pantools.pantools.Pantools;
+import org.neo4j.graphdb.Direction;
+import org.neo4j.graphdb.Label;
+import org.neo4j.graphdb.Node;
+import org.neo4j.graphdb.NotFoundException;
+import org.neo4j.graphdb.Relationship;
+import org.neo4j.graphdb.ResourceIterator;
+import org.neo4j.graphdb.Transaction;
 import htsjdk.tribble.readers.LineIterator;
 import nl.wur.bif.pantools.index.IndexPointer;
 import nl.wur.bif.pantools.index.IndexScanner;
 import nl.wur.bif.pantools.sequence.SequenceScanner;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-import org.neo4j.graphdb.*;
 
 import java.io.*;
 import java.nio.file.Files;
@@ -73,12 +79,6 @@ public class AnnotationLayer {
 
         // Suppress all logging from the htsjdk library for Gff3Codec
         Log.setGlobalLogLevel(Log.LogLevel.ERROR);
-
-        // Current workaround for logging with log4j
-        Logger annotationLogger = LogManager.getLogger(AnnotationLayer.class);
-        if (annotationLogger.isDebugEnabled()) {
-            LOG = true;
-        }
     }
    
     /**
@@ -360,13 +360,10 @@ public class AnnotationLayer {
             }
         }
 
-        // print the length of each sequence if LOG
-        if (LOG) {
-            System.out.println("Sequence lengths for genome " + genome + ":");
-            for (String sequence : sequenceLengths.keySet()) {
-                System.out.println("\t" + sequence + ": " + sequenceLengths.get(sequence));
-            }
-            System.out.println();
+        // print the length of each sequence with Pantools.logger
+        Pantools.logger.debug("Sequence lengths for genome {}: ", genome);
+        for (String id : sequenceLengths.keySet()) {
+            Pantools.logger.debug("\t{}: {}", id, sequenceLengths.get(id));
         }
 
         return sequenceLengths;
@@ -393,13 +390,10 @@ public class AnnotationLayer {
             }
         }
 
-        // print the number of each sequence if LOG
-        if (LOG) {
-            System.out.println("Sequence numbers for genome " + genome + ":");
-            for (String sequence : sequenceNumbers.keySet()) {
-                System.out.println("\t" + sequence + ": " + sequenceNumbers.get(sequence));
-            }
-            System.out.println();
+        // print the number of each sequence with Pantools.logger
+        Pantools.logger.debug("Sequence numbers for genome {}: ", genome);
+        for (String id : sequenceNumbers.keySet()) {
+            Pantools.logger.debug("\t{}: {}", id, sequenceNumbers.get(id));
         }
 
         return sequenceNumbers;
@@ -479,7 +473,7 @@ public class AnnotationLayer {
     private void addAnnotationToPangenome(int genomeNr, LinkedHashSet<Gff3Feature> annotation, String annotationId,
             String proteinDir, BufferedWriter logFile, Node annotationNode) {
 
-        CoFeatures coFeatures = new CoFeatures(genomeNr, GRAPH_DB, LOG);
+        CoFeatures coFeatures = new CoFeatures(genomeNr, GRAPH_DB);
         int totalFeatures = annotation.size();
         int counter = 0;
         int trsc = 0;
@@ -487,11 +481,7 @@ public class AnnotationLayer {
             Transaction tx = GRAPH_DB.beginTx();
             for (Gff3Feature gff3Feature : annotation) {
                 counter++;
-                if (LOG) {
-                    System.out.println("\tFeature " + counter + "/" + totalFeatures);
-                } else {
-                    System.out.print("\r\tFeature " + counter + "/" + totalFeatures);
-                }
+                Pantools.logger.debug("Feature {}/{}" , counter, totalFeatures);
 
                 if (gff3Feature.isTopLevelFeature()) {
 
@@ -541,9 +531,7 @@ public class AnnotationLayer {
 //                            break;
                         default:
                             handleMiscFeature(gff3Feature, featureNode, genomeNr, annotationId, logFile, false, 1);
-                            if (LOG) {
-                                System.out.println("Feature '" + gff3Feature.getID() + "' has unhandled type: " + featureType);
-                            }
+                            Pantools.logger.debug("Feature '{}' has unhandled type '{}'", featureId, featureType);
                             break;
                     }
                 }
@@ -567,9 +555,7 @@ public class AnnotationLayer {
         }
 
         // add coFeatures to pangenome
-        if (LOG) {
-            System.out.println("Adding " + coFeatures.size() + " coFeature (" + coFeatures.getCoFeatures() + ") to pangenome");
-        }
+        Pantools.logger.debug("Adding {} co-features ({}) to pangenome", coFeatures.size(), coFeatures.getCoFeatures());
         coFeatures.addCoFeaturesToPangenome();
 
         try { // start database transaction
@@ -984,9 +970,7 @@ public class AnnotationLayer {
      * @return created feature node in pangenome
      */
     private Node createFeatureNode(Gff3Feature gff3Feature, String featureId, String featureType, int genomeNr, String annotationId, BufferedWriter logFile) {
-        if (LOG) {
-            System.out.println("Creating node for " + featureId);
-        }
+        Pantools.logger.debug("Creating node for {}", featureId);
 
         int[] address = new int[4];
         address[0] = genomeNr;
@@ -1283,9 +1267,7 @@ public class AnnotationLayer {
         HashSet<String> processedGenes = new HashSet<>();
 
         System.out.println("Adding protein sequences for genome " + genome);
-        if (LOG) {
-            System.out.println("geneNodes.size() = " + geneNodes.size());
-        }
+        Pantools.logger.debug("geneNodes.size() = {}", geneNodes.size());
 
         try (BufferedWriter out = new BufferedWriter(new FileWriter(proteinDir + "/proteins_" + genome + ".fasta"))) {
 
@@ -1304,30 +1286,24 @@ public class AnnotationLayer {
                     address[3] -= 1;
                     isCoFeature = geneNode.hasLabel(cofeature_label);
 
-                    if (LOG) {
-                        System.out.println("\n\ngeneNode.getProperty(\"id\") = " + geneId);
-                    }
+                    Pantools.logger.debug("geneNode.getProperty(\"id\") = {}", geneId);
 
                     if (processedGenes.contains(geneId)) {
-                        if (LOG) {
-                            System.out.println("Gene " + geneId + " already processed");
-                        }
+                        Pantools.logger.debug("Gene {} already processed", geneId);
                         continue;
                     }
 
                     genomeSc.get_sub_sequence(geneBuilder, address[0], address[1], address[2], address[3] - address[2] + 1, true);
 
-                    if (LOG) {
-                        System.out.println("address = " + Arrays.toString(address));
-                        System.out.println("geneBuilder.length() = " + geneBuilder.length());
-                        StringBuilder title = new StringBuilder();
-                        genomeSc.get_sequence_title(title, address[0], address[1]);
-                        System.out.println("title = " + title);
-                        StringBuilder seq = new StringBuilder();
-                        genomeSc.get_sub_sequence(seq, address[0], address[1], address[2], address[3] - address[2] + 1, true);
-                        System.out.println("seq = " + seq);
-                        System.out.println("genomeDb.sequence_length[g][s] = " + genomeDb.sequence_length[address[0]][address[1]]);
-                    }
+                    Pantools.logger.trace("address = {}", Arrays.toString(address));
+                    Pantools.logger.trace("geneBuilder.length() = {}", geneBuilder.length());
+                    StringBuilder title = new StringBuilder();
+                    genomeSc.get_sequence_title(title, address[0], address[1]);
+                    Pantools.logger.trace("title = {}", title);
+                    StringBuilder seq = new StringBuilder();
+                    genomeSc.get_sub_sequence(seq, address[0], address[1], address[2], address[3] - address[2] + 1, true);
+                    Pantools.logger.trace("seq = {}", seq);
+                    Pantools.logger.trace("genomeDb.sequence_length[g][s] = {}", genomeDb.sequence_length[address[0]][address[1]]);
 
                     if (geneBuilder.length() == 0) {
                         logFile.write("No sequence for gene " + geneNode.getProperty("id") + " could be found\n");
@@ -1339,9 +1315,7 @@ public class AnnotationLayer {
                         mrnaNode = rel1.getEndNode();
                         mrnaId = (String) mrnaNode.getProperty("id");
 
-                        if (LOG) {
-                            System.out.println("mrnaId = " + mrnaId + " (address: " + Arrays.toString((int[]) mrnaNode.getProperty("address")) + ")");
-                        }
+                        Pantools.logger.debug("mrnaId = {} (address: {})", mrnaId, Arrays.toString((int[]) mrnaNode.getProperty("address")));
 
                         if (mrnaNode.hasRelationship(RelTypes.contributes_to, Direction.INCOMING)) {
                             ++numIsoforms;
@@ -1352,10 +1326,8 @@ public class AnnotationLayer {
                             protein = translate(new StringBuilder(mrnaSeq));
 
                             // log
-                            if (LOG) {
-                                System.out.println("mrnaSeq = " + mrnaSeq);
-                                System.out.println("protein = " + protein);
-                            }
+                            Pantools.logger.trace("mrnaSeq = {}", mrnaSeq);
+                            Pantools.logger.trace("protein = {}", protein);
 
                             if (protein.length() > 0) {
                                 if (protein.endsWith("*")) {
@@ -1371,11 +1343,7 @@ public class AnnotationLayer {
                                 ++num_proteins;
                                 proteinCounter++;
 
-                                if (LOG) {
-                                    System.out.println("\tProtein " + proteinCounter);
-                                } else {
-                                    System.out.print("\r\tProtein " + proteinCounter);
-                                }
+                                Pantools.logger.debug("Protein {}", proteinCounter);
 
                                 if (idOccurrences.containsKey(mrnaId)) {
                                     idOccurrences.put(mrnaId, idOccurrences.get(mrnaId) + 1);
@@ -1489,9 +1457,7 @@ public class AnnotationLayer {
             if (!isCoFeature) {
                 if ((forward && count == 0) || (!forward && count == (numCds - 1))) {
                     phase = (int) cdsNode.getProperty("phase");
-                    if (LOG) {
-                        System.out.println("\t\tCDS " + count + " phase = " + phase);
-                    }
+                    Pantools.logger.trace("\t\tCDS {} phase = {}", count, phase);
                 }
             }
 
@@ -1518,10 +1484,9 @@ public class AnnotationLayer {
             }
 
             // log what was done
-            if (LOG) {
-                System.out.println("CDS node = " + cdsNode.getId() + ") (strand: " + forward + ") at: " + Arrays.toString(address));
-                System.out.println("cdsSequence = " + cdsSequence);
-            }
+            Pantools.logger.trace("CDS node = {} (strand: {}) at: {}",
+                    cdsNode.getId(), forward, Arrays.toString(address));
+            Pantools.logger.trace("cdsSequence = {}", cdsSequence);
 
             // increment count
             count++;
@@ -1534,9 +1499,7 @@ public class AnnotationLayer {
         StringBuilder mrnaSequence = new StringBuilder(cdsBuilder.toString());
 
         // return the mRNA sequence from position defined by phase
-        if (LOG) {
-            System.out.println("phase = " + phase);
-        }
+        Pantools.logger.trace("phase = {}", phase);
         if (phase == -1) { // assume phase is 0 if not specified
             return mrnaSequence.toString();
         } else {
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java
index 0a26a64736ed6d3a7323ee50ee5ed77698dbdc7f..f0e46f104f2d4883e994ce350d6355ad55d3ca46 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java
@@ -15,7 +15,9 @@ import java.util.LinkedHashMap;
 import cern.jet.math.Arithmetic;
 import nl.wur.bif.pantools.cli.RemoveFunctions;
 import nl.wur.bif.pantools.index.IndexDatabase;
+import nl.wur.bif.pantools.pantools.Pantools;
 import nl.wur.bif.pantools.sequence.SequenceDatabase;
+import nl.wur.bif.pantools.utils.Utils;
 import org.neo4j.graphdb.Label;
 import java.util.Scanner;
 import java.util.stream.Collectors;
@@ -1036,41 +1038,7 @@ public class Classification {
         String frequency = determine_genome_freq_absence_presence_freq(new_copy_number);
         return frequency;
     }
-    
-    /**
-     * Retrieve homology groups via --node or -hm
-     * @return 
-     */
-    public ArrayList<Node> read_homology_groups_for_grp_info() {
-        ArrayList<Node> hm_nodes_list = new ArrayList<>();
-        try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
-            if (NODE_ID != null && SELECTED_HMGROUPS == null) {
-                NODE_ID = NODE_ID.replace(" ","");
-                String[] hmgroups_array = NODE_ID.split(",");
-                for (String hmgroup : hmgroups_array) {
-                    try {
-                        long hmgroup_id = Long.parseLong(hmgroup);
-                        Node hm_node = GRAPH_DB.getNodeById(hmgroup_id);
-                        hm_nodes_list.add(hm_node);
-                    } catch (NumberFormatException e) {
-                        System.out.println(hmgroup + " included with --node is not a number.\n");
-                        System.exit(1);
-                    }
-                }
-            } else if (NODE_ID == null && SELECTED_HMGROUPS != null) {
-                hm_nodes_list = read_hmgroup_input_file();
-            } else if (NODE_ID == null && SELECTED_HMGROUPS == null) {
-                System.out.println("Provide either the --node or --homology-groups/-hm argument\n");
-                System.exit(1);
-            } else {
-                System.out.println("Provide either the --node or --homology-groups/-hm argument. Not both!\n");
-                System.exit(1);
-            }
-            tx.success();
-        } 
-        return hm_nodes_list;
-    }
-    
+
     /**
      * 
      * @param mrna_gff_id
@@ -1103,32 +1071,32 @@ public class Classification {
      * Optional 
      * -label 
      */
-    public void homology_group_info() {
-        System.out.println("\nReporting all information from selected homology groups\n");
+    public void homology_group_info(List<Long> homologyGroups) throws RuntimeException {
+        Pantools.logger.info("Reporting all information from selected homology groups");
         StringBuilder output_builder = new StringBuilder();
         HashSet<String> groups_with_function = new HashSet<>();
         HashMap<String, HashSet<String>> groups_with_function_or_name = new HashMap<>();
 
         BufferedWriter nodeBuilder = null;
+        ArrayList<Node> hmNodeList = null;
         create_directory_in_DB("group_info");
         delete_file_in_DB("group_info/group_functional_annotations.txt"); 
         
-
-
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
             Node pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
             check_if_panproteome(pangenome_node); // sets PROTEOME boolean that controls functions, retrieves K_size & total_genomes
             create_skip_arrays(false, true); // create skip array if -skip/-ref is provided by user
             retrieve_phenotypes(); // Creates and fills geno_pheno_map and phenotype_map when a phenotype was provided by the user
             check_current_grouping_version();
+            hmNodeList = Utils.findHmNodes(homologyGroups, 0);
             tx.success(); // transaction successful, commit changes
         } catch (NotFoundException nfe) {
-            System.out.println("Unable to start the database\n");
+            Pantools.logger.error("Unable to start the database.");
             System.exit(1);
         }
 
         if (Mode.contains("K-MER")) { // user (that does not look at code) cannot see this mode
-            System.out.println("\r--kmer was selected. Extracting nucleotide nodes from the graph");
+            Pantools.logger.info("K-MER mode was selected. Extracting nucleotide nodes from the graph.");
             indexDb = new IndexDatabase(WORKING_DIRECTORY + INDEX_DATABASE_PATH, "sorted");
             indexSc = new IndexScanner(indexDb);
             genomeDb = new SequenceDatabase(WORKING_DIRECTORY + GENOME_DATABASE_PATH);
@@ -1137,23 +1105,21 @@ public class Classification {
                 nodeBuilder = new BufferedWriter(new FileWriter(WORKING_DIRECTORY + "group_info/group_nodes.txt"));
                 nodeBuilder.write("#To visualize in the Neo4j browser:\n MATCH (n) where id(n) in [ PLACE,NODES,HERE ] return n\n\n");
             } catch (IOException e) {
-                System.out.println("Unable to create " + WORKING_DIRECTORY + "group_info/group_nodes.txt");
+                Pantools.logger.error("Unable to create {}", WORKING_DIRECTORY + "group_info/group_nodes.txt");
                 e.printStackTrace();
                 System.exit(1);
             }
         }
-        
-        ArrayList<Node> hm_node_list = read_homology_groups_for_grp_info();
+
         ArrayList<String> name_list = new ArrayList<>();
         if (SELECTED_NAME != null) {
             name_list = new ArrayList<>(Arrays.asList(SELECTED_NAME.split("\\s*,\\s*")));
         }
-        
 
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
-            function_overview_per_group(hm_node_list, WORKING_DIRECTORY + "group_info/");
+            function_overview_per_group(hmNodeList, WORKING_DIRECTORY + "group_info/");
             int counter = 0;
-            for (Node hm_node : hm_node_list) {
+            for (Node hm_node : hmNodeList) {
                 HashSet<Node> nucleotideNodes = new HashSet<>();
                 ArrayList<Node> mrna_nodes = new ArrayList<>();
                 counter ++; 
@@ -1162,7 +1128,7 @@ public class Classification {
                 if (!correct) {
                     correct = test_if_correct_label(hm_node, inactive_homology_group_label, false);
                     if (!correct) {
-                        System.out.println("\n\n " + hm_node + " is not an homology group");
+                        Pantools.logger.error("{} is not a homology group.", hm_node);
                         continue;
                     }
                 }
@@ -1187,8 +1153,8 @@ public class Classification {
                 HashSet<Node> function_node_set = new HashSet<>();
                 ArrayList<Integer> psize_list = new ArrayList<>();
                 Iterable<Relationship> relations = hm_node.getRelationships();
+                Pantools.logger.info("Retrieving group information: {}/{} homology groups.", counter, hmNodeList.size());
                 for (Relationship rel : relations) {
-                    System.out.print("\rRetrieving group information: " + counter + "/" + hm_node_list.size() + " homology groups.  ");  
                     Node mrna_node = rel.getEndNode();
                     long mrna_node_id = mrna_node.getId();
                     int[] address = (int[]) mrna_node.getProperty("address");
@@ -1265,7 +1231,7 @@ public class Classification {
                     assert nodeBuilder != null;
                     nodeBuilder.close();
                 } catch (IOException e) {
-                    System.out.println("Unable to close " + WORKING_DIRECTORY + "/group_info/nodes.txt");
+                    Pantools.logger.error("Unable to close {}", WORKING_DIRECTORY + "/group_info/nodes.txt");
                     e.printStackTrace();
                     System.exit(1);
                 }
@@ -1280,21 +1246,19 @@ public class Classification {
      * 
      */
     public void print_output_files_for_grp_info() {
-        System.out.print("\n\nOutput written to:\n "
-                + WORKING_DIRECTORY + "group_info/group_info.txt\n " 
-                + WORKING_DIRECTORY + "group_info/functions_per_group_and_mrna.csv\n "
-                + WORKING_DIRECTORY + "group_info/function_counts_per_group.csv\n ");
-            
+        Pantools.logger.info("Output written to:");
+        Pantools.logger.info(" {}", WORKING_DIRECTORY + "group_info/group_info.txt");
+        Pantools.logger.info(" {}", WORKING_DIRECTORY + "group_info/functions_per_group_and_mrna.csv");
+        Pantools.logger.info(" {}", WORKING_DIRECTORY + "group_info/function_counts_per_group.csv");
+
         if (SELECTED_LABEL != null) {
-            System.out.print(WORKING_DIRECTORY + "group_info/groups_with_function.txt\n ");
+            Pantools.logger.info(" {}", WORKING_DIRECTORY + "group_info/groups_with_function.txt");
         }
         if (SELECTED_NAME != null) {
-            System.out.print(WORKING_DIRECTORY + "group_info/groups_with_name.txt\n ");
-        }  
+            Pantools.logger.info(" {}", WORKING_DIRECTORY + "group_info/groups_with_name.txt");
+        }
         if (Mode.contains("K-MER")) {
-           System.out.print(WORKING_DIRECTORY + "group_info/group_nodes.txt\n\n");
-        } else {
-            System.out.println("");
+            Pantools.logger.info(" {}", WORKING_DIRECTORY + "group_info/group_nodes.txt");
         }
     }
     
@@ -1544,7 +1508,7 @@ public class Classification {
             questionable_buscos = SELECTED_NAME.split(",");
         } 
        
-        String threads_str = report_number_of_threads(true, true); // prints how many threads were selected by user
+        String threads_str = report_number_of_threads(); // prints how many threads were selected by user
         check_if_graphDb_is_available(); // starts up the graph database if needed
         try (Transaction tx = GRAPH_DB.beginTx()) {
             Node pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
@@ -2355,6 +2319,7 @@ public class Classification {
                 tx.success();
                 System.out.println("\rRemoval was successful");
                 long phenotypeNodeCount = count_nodes(phenotype_label);
+                new File(WORKING_DIRECTORY + "phenotype_overview.txt").delete(); // delete older summmary file
                 if (phenotypeNodeCount > 0) { // create a new phenotype_overview.txt if there still are phenotype nodes
                     phenotype_overview();
                 }
@@ -2375,315 +2340,260 @@ public class Classification {
      * --append 
      * --skip or --reference
      */
-    public void addPhenotype() {
+    public void addPhenotype(Path phenotypesFile, int bins, boolean updateNodes) {
         System.out.println("\nPhenotype node creation and appending\n");
-        check_if_graphDb_is_available(); // starts up the graph database if needed
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
-            Node pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
-            check_if_panproteome(pangenome_node); // sets PROTEOME boolean that controls functions, retrieves K_size & total_genomes  
-            create_skip_arrays(false, true); // create skip array if -skip/-ref is provided by user
+            Node pangenomeNode = GRAPH_DB.findNodes(pangenome_label).next();
+            check_if_panproteome(pangenomeNode); // sets PROTEOME boolean that controls functions, retrieves K_size & total_genomes
             tx.success(); // transaction successful, commit changes
         } catch (NotFoundException nfe) {
             System.out.println("Unable to start the pangenome database\n");
             System.exit(1);
         }
-        
-        if (PHENOTYPE == null) {
-            System.out.println("\rPlease provide a CSV formatted input file with '--phenotype'\n"); 
-            return;
-        }
 
-        int bins = 3;
-        if (BINS != null) {
-            try {
-                bins = Integer.parseInt(BINS);
-            } catch (NumberFormatException no) {
-                System.out.println("--value " + BINS + "' is not a numerical value\n");
-                System.exit(1);
-            }
-            System.out.println("\r--value was set! Placing numerical values in " + BINS  + " equally sized bins\n");
+        int columnsInHeader = 0, lineCounter = 0;
+        HashMap<Integer, String> propertyPerColumn = new HashMap<>();
+        HashSet<String> phenotypeProperties = new HashSet<>();
+        if (!updateNodes) {
+            removePreviousPhenotypeNodes();
         }
-        
-        int header_length = 0, line_count = 0;
-        Node[] pheno_node_array = new Node[total_genomes];
-        HashMap<Integer, String> column_value_map = new HashMap<>();
-        HashSet<String> existing_phenotypes = new HashSet<>();
-        ArrayList<String> already_present = new ArrayList<>();
-
-        HashMap<String, ArrayList<Object>> valuesPerPhenotype = new HashMap<>();
+        Node[] phenotypeNodes = getExistingPhenotypeNodes();
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
-            check_existing_phenotype_nodes(existing_phenotypes, pheno_node_array);
-            try (BufferedReader in = new BufferedReader(new FileReader(PHENOTYPE))) {
-                for (int i = 0; in.ready();) { // go over the lines of the input file 
-                    String line = in.readLine().trim();
-                    String[] line_array = split_csv_line(line);
-                    if (line_count == 0) { // line 0 is the header header 
-                        if (line.contains(";;") || line.contains(",,")) {
-                            System.out.println("The header of your csv file is not correctly formatted");
-                            System.exit(1);
+            try {
+                BufferedReader br = Files.newBufferedReader(phenotypesFile);
+                for (String line = br.readLine(); line != null; line = br.readLine()) {
+                    line = line.trim();
+                    String[] lineArray = line.replace(", ", ",").replace(" ,", ",").split(",");
+                    if (lineCounter == 0) { // first line is the header
+                        if (line.contains(",,")) { // header is not allowed to have empty columns
+                            throw new RuntimeException("The header of your csv file is not correctly formatted, no empty column double commas allowed!");
+                        }
+                        if (lineArray.length < 2) {
+                            throw new RuntimeException("The input csv file must have at least two columns. The first column must always be the genome numbers");
                         }
-                        header_length = read_header_phenotype(line_array, existing_phenotypes, already_present, column_value_map);
-                    } else { // rest of the lines
-                        if (header_length < line_array.length) {
-                            System.out.println("The header contained less values as the rest of the file " + header_length + " " + line_array.length);
-                            System.exit(1);
+                        columnsInHeader = lineArray.length;
+                        for (int i = 1; i < lineArray.length; i++) { // starts at 1 because 0 is 'Genome'
+                            String phenotypeProperty = lineArray[i].trim();
+                            propertyPerColumn.put(i, phenotypeProperty);
+                        }
+                    } else {
+                        if (columnsInHeader < lineArray.length) {
+                            throw new RuntimeException("The header (first line) contained less columns as line " + (line+1) + ":" + columnsInHeader + " against" + lineArray.length );
+                        }
+
+                        int genomeNr;
+                        try {
+                            genomeNr = Integer.parseInt(lineArray[0]);
+                        } catch (NumberFormatException numberFormatException){
+                            throw new RuntimeException("Only genome numbers are allowed in the leftmost column. Found " + lineArray[0]);
                         }
-                        Node pheno_node = create_pheno_node_if_not_present(line_array, pheno_node_array);
-                        for (int l = 0; l < header_length; l++) {
-                            set_property_to_pheno_node(line_array, l, column_value_map, already_present, pheno_node, valuesPerPhenotype);
+                        Node phenotypeNode = getPhenotypeNode(genomeNr, phenotypeNodes);
+                        for (int i = 1; i < columnsInHeader; i++) { // skip first column because it holds genome numbers
+                            Object phenotypeValue;
+                            try {
+                                phenotypeValue = lineArray[i];
+                            } catch (ArrayIndexOutOfBoundsException arrayIndexOutOfBoundsException) { // This happens when the last column is empty on some rows
+                                phenotypeValue = "";
+                            }
+                            String phenotypeProperty = propertyPerColumn.get(i);
+                            writeValueToPhenotypeNode(phenotypeValue, phenotypeNode, phenotypeProperty);
+                            phenotypeProperties.add(phenotypeProperty);
                         }
                     }
-                    line_count ++;
-                } 
-                System.out.println("\rSuccessfully created phenotype nodes for " + (line_count-1) + " genomes!");
+                    lineCounter ++;
+                }
             } catch (IOException ioe) {
-                System.out.println("\nFailed to read: " + PHENOTYPE + "\n");
-                System.exit(1);
+                throw new RuntimeException("Something went wrong while reading: " + phenotypesFile);
             }
             tx.success(); // transaction successful, commit changes
         }
-        binPhenotypeValues(valuesPerPhenotype, bins);
+        binPhenotypeValues(phenotypeProperties, bins, phenotypeNodes);
+        String createdOrUpdated = "created";
+        if (updateNodes) {
+            createdOrUpdated = "updated";
+        }
+
+        System.out.println("\rSuccessfully " + createdOrUpdated + " phenotype nodes for " + (lineCounter-1) + " genomes!");
         phenotype_overview();
         GRAPH_DB.shutdown();
-    } 
+    }
 
-    public void binPhenotypeValues(HashMap<String, ArrayList<Object>> valuesPerPhenotype, int bins) {
-        HashMap<Integer, Node> genomePhenoNode = new HashMap<>();
-        boolean first = true;
-        try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
-            ResourceIterator<Node> phenoNodes = GRAPH_DB.findNodes(phenotype_label);
-            while (phenoNodes.hasNext()) {
-                Node phenoNode = phenoNodes.next();
-                int genomeNr = (int) phenoNode.getProperty("genome");
-                genomePhenoNode.put(genomeNr, phenoNode);
-            }
+    private void updateValuesPerPhenotype(int genomeNr, HashMap<String, Object[]> valuesPerPhenotype,
+                                          String phenotypeProperty, String phenotypeValue) {
 
+        Object[] array = valuesPerPhenotype.get(phenotypeProperty);
+        if (array == null) {
+            array = new Object[total_genomes];
+        }
+        array[genomeNr-1] = phenotypeValue;
+        valuesPerPhenotype.put(phenotypeProperty, array);
+    }
 
-            for (String key : valuesPerPhenotype.keySet()) {
-                ArrayList<Object> valuesList = valuesPerPhenotype.get(key);
-                double lowest = 999999999;
-                double highest = -999999999;
-                for (Object value : valuesList) {
-                    try {
-                        double d = Double.parseDouble((String) value);
-                        if (d > highest) {
-                            highest = d;
-                        }
-                        if (d < lowest) {
-                            lowest = d;
-                        }
-                    } catch (Exception ex) {
-                        //System.out.println("no " + value);
-                    }
-                }
-                if (lowest == 999999999) {
+    /**
+     * only go over the recently added properties
+     * @param phenotypeProperties all phenotype properties part of the current analysis
+     * @param bins number of bins
+     * @param phenotypeNodes array with phenotype nodes on genomeNr-1 positions
+     */
+    private void binPhenotypeValues(HashSet<String> phenotypeProperties, int bins, Node[] phenotypeNodes) {
+        HashMap<String, Object[]> valuesPerPhenotype = retrieveAllPhenotypesAsObject();
+        boolean first = true;
+
+        try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
+            for (String phenotypeProperty : phenotypeProperties) {
+                Object[] phenotypeValues = valuesPerPhenotype.get(phenotypeProperty);
+                double[] lowestHighestValue = findHighestLowestValue(phenotypeValues);
+                if (lowestHighestValue == null) { // null when not every value is a number
                     continue;
                 }
-                double difference = highest - lowest;
-                double step = difference / bins;
-                ArrayList<Double> levels = new ArrayList<>();
-                levels.add(lowest);
-                for (int i = 1; i < bins; i++) {
-                    levels.add((i * step) + lowest);
-                }
-                levels.add(highest);
+
+                ArrayList<Double> levels = determineBinLevels(lowestHighestValue[0], lowestHighestValue[1], bins);
                 if (first) {
-                    System.out.println("\nPhenotypes with numerical values recognized! " +
-                            "If the values do not exactly match they are not considered to be different phenotypes.\n" +
-                            "Extra phenotypes are generated that for equally sized bins on " + bins + " levels.\n");
+                    System.out.println("\nPhenotypes with only numerical values recognized. " +
+                            "Numbers are considered different phenotypes if they do not exactly match!\n" +
+                            "Extra phenotypes are generated which place these values in " + bins + " equally sized bins");
                     first = false;
                 }
-                System.out.println(key  + " -> " + key + "_binned. Level ranges " + levels);
-                int genomeNr = 0;
-                for (Object value : valuesList) {
-                    genomeNr++;
-                    Node phenoNode = genomePhenoNode.get(genomeNr);
-                    phenoNode.removeProperty(key + "_binned");
+                System.out.println(" Phenotype '" + phenotypeProperty + "_binned' range of bins: " + levels.toString().replace("[","").replace("]",""));
+                for (int i = 0; i < phenotypeValues.length; i++) {
+                    Object value = phenotypeValues[i];
+                    if (value == null) {
+                        continue;
+                    }
+                    Node phenotypeNode = phenotypeNodes[i];
+                    phenotypeNode.removeProperty(phenotypeProperty + "_binned");
                     try {
-                        double d = Double.parseDouble((String) value);
-                        for (int i = 0; i < levels.size() - 1; i++) {
-                            if (d >= levels.get(i) && d <= levels.get(i + 1)) {
-                                phenoNode.setProperty(key + "_binned", "bin " + (i + 1));
+                        double number = Double.parseDouble((String) value);
+                        for (int l = 0; l < levels.size() - 1; l++) {
+                            if (number >= levels.get(l) && number <= levels.get(l + 1)) {
+                                phenotypeNode.setProperty(phenotypeProperty + "_binned", "bin " + (l + 1));
                                 break;
                             }
                         }
-                    } catch (Exception ex) {
-                        phenoNode.setProperty(key + "_binned", "Unknown");
+                    } catch (NumberFormatException ex) {
+                        phenotypeNode.setProperty(phenotypeProperty + "_binned", "Unknown");
                     }
                 }
             }
             tx.success();
         }
-    }
-
-    /**
-     * Removes additional spaces around commas or semicolons 
-     * @param line
-     * @return 
-     */
-    public String[] split_csv_line(String line) {
-        String[] line_array;
-        if (line.contains(";")) {
-            String newline = line.replace("; ", ";").replace(" ;", ";");
-            line_array = newline.split(";");
-        } else {// when seperated with a comma 
-            String newline = line.replace(", ", ",").replace(" ,", ",");
-            line_array = newline.split(",");
+        if (!first) {
+            System.out.println("");
         }
-        return line_array;
     }
-    
-    /**
-     * Read the header of the CSV formatted phenotpe input file 
-     * @param line_array
-     * @param existing_phenotypes
-     * @param already_present
-     * @param column_value_map
-     * @return header length (int)
-     */
-    public int read_header_phenotype(String[] line_array, HashSet<String> existing_phenotypes, ArrayList<String> already_present, 
-            HashMap<Integer, String> column_value_map ) {
-        
-        ArrayList<String> new_phenotypes = new ArrayList<>();
-        int column = 0;
-        
-        boolean ignore_first = true;
-        for (String value : line_array) {
-            if (ignore_first) { // is 'Genome' 
-                ignore_first = false;
-                column ++;
+
+    private double[] findHighestLowestValue(Object[] numbers) {
+        double lowest = Double.MAX_VALUE, highest = Double.MIN_VALUE;
+        for (Object value : numbers) {
+            if (value == null) { // genome was not included, skip entirely
                 continue;
             }
-            String trim_value = value.trim(); 
-            if (existing_phenotypes.contains(trim_value)) {
-                already_present.add(trim_value);
-            } else {
-                new_phenotypes.add(trim_value);
+            try {
+                double doubleValue = Double.parseDouble((String) value);
+                if (doubleValue > highest) {
+                    highest = doubleValue;
+                }
+                if (doubleValue < lowest) {
+                    lowest = doubleValue;
+                }
+            } catch (NumberFormatException numberFormatException) {
+                return null;
             }
-            if (column > 0) { 
-                column_value_map.put(column, trim_value);
-            } 
-            column ++;
-        }
-        if (already_present.size() > 0) {
-            System.out.println("\rThese phenotypes are already present. Only updating their values " + already_present);
-            if (new_phenotypes.size() > 0) {
-                System.out.println(new_phenotypes.size() + " New phenotypes " + new_phenotypes);
-            } 
         }
-        
-        if (line_array.length < 2) {
-            System.out.println("\rThe input file must have at least two columns. The first column must always be the genome numbers\n");
-            System.exit(1);
+        return new double[]{lowest, highest};
+    }
+
+    private ArrayList<Double> determineBinLevels(double lowest, double highest, int bins) {
+        double difference = highest - lowest;
+        double step = difference / bins;
+        ArrayList<Double> levels = new ArrayList<>();
+        levels.add(lowest);
+        for (int i = 1; i < bins; i++) {
+            levels.add((i * step) + lowest);
         }
-        return line_array.length;
+        levels.add(highest);
+        return levels;
     }
-     
+
     /**
      * Retrieve or create 'phenotype' node of a specific genome
-     * @param line_array
-     * @param pheno_node_array
+     * @param genomeNr
+     * @param phenotypeNodes array with phenotype nodes on genomeNr-1 positions
      * @return 
      */
-    public Node create_pheno_node_if_not_present(String[] line_array, Node[] pheno_node_array) {
-        int genome_nr = Integer.parseInt(line_array[0]);
-        Node pheno_node;
-        if (APPEND) { // node already exists 
-            pheno_node = pheno_node_array[genome_nr-1];
+    public Node getPhenotypeNode(int genomeNr, Node[] phenotypeNodes) {
+        Node phenotypeNode;
+        if (phenotypeNodes[genomeNr-1] != null) { // node already exists from previous run
+            phenotypeNode = phenotypeNodes[genomeNr-1];
         } else {
-            pheno_node = GRAPH_DB.createNode(phenotype_label);
-            pheno_node.setProperty("genome", genome_nr);
-        }
-        if (!PROTEOME && !APPEND) {
-            Node genome_node = GRAPH_DB.findNode(genome_label, "number", genome_nr);
-            if (genome_node == null) {
-                System.out.println("\rNo genome has been found with number: " + genome_nr + "\n");
-                System.exit(1);
+            phenotypeNode = GRAPH_DB.createNode(phenotype_label);
+            phenotypeNode.setProperty("genome", genomeNr);
+            if (!PROTEOME) {
+                Node genomeNode = GRAPH_DB.findNode(genome_label, "number", genomeNr);
+                if (genomeNode == null) {
+                    throw new RuntimeException("No genome has been found with number: " + genomeNr);
+                }
+                genomeNode.createRelationshipTo(phenotypeNode, RelTypes.has_phenotype);
             }
-            genome_node.createRelationshipTo(pheno_node, RelTypes.has_phenotype);
+            phenotypeNodes[genomeNr-1] = phenotypeNode;
         }
-        return pheno_node;
+        return phenotypeNode;
     }
-    
+
     /**
-     * Convert the string to the correct type of variable and store in a phenotype node.
-     *
-     * Most of this function can be replaced with "instanceof"
-     *
-     * @param line_array
-     * @param column
-     * @param column_value_map
-     * @param already_present
-     * @param pheno_node 
+     * @param phenotypeValue
+     * @param phenotypeNode
+     * @param phenotypeProperty
+     * @return
      */
-    public static String set_property_to_pheno_node(String[] line_array, int column, HashMap<Integer, String> column_value_map,
-            ArrayList<String> already_present, Node pheno_node, HashMap<String, ArrayList<Object>> valuesPerPhenotype) {
-
-        String type = "string";
-        String value;
-        try {
-            value = line_array[column];
-        } catch (ArrayIndexOutOfBoundsException oob) { // This happens when the last column is empty on some rows 
-            value = "";
-        }
-        String testvalue = value.substring(0);
-        boolean test = testvalue.matches("[0-9]+");
-        boolean test1 = testvalue.matches("[A-Z]+");
-        boolean test2 = testvalue.matches(".*[a-zA-Z]+.*");
-        boolean test3 = testvalue.matches(".*[0-9]+.*") && testvalue.contains(".");
-        if (column > 0) { 
-            String current_pheno = column_value_map.get(column);
-            if (current_pheno == null) {
-                return "";
-            }
-            if (APPEND && already_present.contains(current_pheno)) { // remove the current value 
-                pheno_node.removeProperty(current_pheno);
-            }
-            if (value.equals("TRUE") || value.equals("True") || value.equals("true")) { // boolean
-                boolean add = true;
-                pheno_node.setProperty(current_pheno, add);
-            } else if (value.equals("FALSE") || value.equals("False") || value.equals("false")) { // boolean 
-                boolean add = false;
-                pheno_node.setProperty(current_pheno, add);
-            } else if (test) { // integer or long
-                if (value.length() < 10) {
-                    int add = Integer.parseInt(value);
-                    pheno_node.setProperty(current_pheno, add);
-                    type = "integer";
-                } else { 
-                    long add = Long.parseLong(value);
-                    pheno_node.setProperty(current_pheno, add);
-                    type = "long";
-                }
-            } else if (test1) { // String
-                value = value.replace(" ","_").replace("\"","").replace("(","").replace(")","");
-                pheno_node.setProperty(current_pheno, value);
-            } else if (value.equals("") || value.equals(" ")) { // no value
-                pheno_node.setProperty(current_pheno, "?");
-            } else if (test2) { // String
-                value = value.replace(" ","_").replace("\"","").replace("(","").replace(")","");
-                pheno_node.setProperty(current_pheno, value);
-            } else if (test3) { // Double
-                double add = Double.parseDouble(value);
-                pheno_node.setProperty(current_pheno, add);
-                type = "double";
-            } else {
-                value = value.replace(" ","_").replace("\"","").replace("(","").replace(")","");
-                pheno_node.setProperty(current_pheno, value);
-            }
-            try_incr_AL_hashmap(valuesPerPhenotype, current_pheno, value);
+    private void writeValueToPhenotypeNode(Object phenotypeValue, Node phenotypeNode, String phenotypeProperty) {
+        if (phenotypeNode.hasProperty(phenotypeProperty)) {
+            phenotypeNode.removeProperty(phenotypeProperty); // remove the current phenotype value
+        }
+        if (phenotypeValue instanceof Boolean) {
+            phenotypeNode.setProperty(phenotypeProperty, (boolean) phenotypeValue);
+        } if (phenotypeValue instanceof Integer) {
+            phenotypeNode.setProperty(phenotypeProperty, (int) phenotypeValue);
+        } else if (phenotypeValue instanceof Double) {
+            phenotypeNode.setProperty(phenotypeProperty, (double) phenotypeValue);
+        } else if (phenotypeValue instanceof String) {
+            phenotypeNode.setProperty(phenotypeProperty, (String) phenotypeValue);
+        } else {
+            throw new RuntimeException(phenotypeValue + " was not recognized as a Boolean, Double, Integer or String");
         }
-        return type;
     }
         
     /**
      * Retrieve phenotypes names and values from the phenotype nodes
      * @return 
      */
+    public HashMap<String, Object[]> retrieveAllPhenotypesAsObject() {
+        HashMap<String, Object[]> valuesPerPhenotype = new HashMap<>();
+        try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
+            ResourceIterator<Node> phenotypeNodes = GRAPH_DB.findNodes(phenotype_label);
+            while (phenotypeNodes.hasNext()) {
+                Node phenotypeNode = phenotypeNodes.next();
+                int genomeNr = (int) phenotypeNode.getProperty("genome");
+                for (String phenotypeProperty : phenotypeNode.getPropertyKeys()) {
+                    if (phenotypeProperty.equals("genome")) {
+                        continue;
+                    }
+                    updateValuesPerPhenotype(genomeNr, valuesPerPhenotype, phenotypeProperty, String.valueOf(phenotypeNode.getProperty(phenotypeProperty)));
+                }
+            }
+            tx.success();
+        }
+        return valuesPerPhenotype;
+    }
+
+    /**
+     * Retrieve phenotypes names and values from the phenotype nodes
+     * @return
+     */
     public HashMap<String, HashSet<String>> retrieve_all_phenotypes() {
         HashMap<String, HashSet<String>> values_per_phenotype = new HashMap<>();
         ResourceIterator<Node> pheno_nodes = GRAPH_DB.findNodes(phenotype_label);
-        while (pheno_nodes.hasNext()) { 
+        while (pheno_nodes.hasNext()) {
             Node pheno_node = pheno_nodes.next();
             Iterable<String> prop_keys = pheno_node.getPropertyKeys();
             for (String prop_key : prop_keys) {
@@ -2692,21 +2602,24 @@ public class Classification {
                 }
                 Object value = pheno_node.getProperty(prop_key);
                 String value_str = "";
-                if (value instanceof String) {
-                    value_str = (String) value;
-                } else if (value instanceof Integer) {
+                if (value instanceof Integer) {
                     int value_int = (int) value;
                     value_str = Integer.toString(value_int);
                 } else if (value instanceof Boolean) {
                     boolean value_bool = (boolean) value;
                     value_str = String.valueOf(value_bool);
+                } else if (value instanceof Double) {
+                    double valueDouble = (double) value;
+                    value_str = Double.toString(valueDouble);
+                } else if (value instanceof String) {
+                    value_str = (String) value;
                 } else {
                     System.out.println("something else.. finish function");
                     System.exit(1);
                 }
                 try_incr_hashset_hashmap(values_per_phenotype, prop_key, value_str);
-            } 
-        }  
+            }
+        }
         return values_per_phenotype;
     }
     
@@ -2724,7 +2637,7 @@ public class Classification {
     public void gene_classification() {
         System.out.println("\nCalculating CORE, UNIQUE, and ACCESSORY homology groups\n"); 
         create_directory_in_DB("gene_classification");
-        report_number_of_threads(true, false); // prints how many threads were selected by user
+        report_number_of_threads(); // prints how many threads were selected by user
         check_if_graphDb_is_available(); // starts up the graph database if needed
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
             Node pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
@@ -4502,7 +4415,7 @@ public class Classification {
      */
     public void pangenome_size_genes() {
         System.out.println("\nCalculate the pangenome size based on genes\n");
-        report_number_of_threads(true, true); // prints how many threads were selected by user
+        report_number_of_threads(); // prints how many threads were selected by user
         int[][] all_hmgroups;
         int[][] all_core, all_accessory, all_unique;
         create_directory_in_DB("pangenome_size/gene/gains_losses"); 
@@ -4709,7 +4622,7 @@ public class Classification {
         check_if_graphDb_is_available(); // starts up the graph database if needed
         retrieve_number_of_loops_for_size(100);
         
-        report_number_of_threads(true, true); // prints how many threads were selected by user
+        report_number_of_threads(); // prints how many threads were selected by user
         long num_nodes = 0, degen_nodes2 = 0;
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
             Node pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
@@ -8185,17 +8098,16 @@ public class Classification {
                 String key = entry.getKey();
                 ArrayList<Integer> value_list = entry.getValue();
                 if (key.startsWith(pheno + ":")) {
-                    key = key.replace(pheno + ":", "");
-                    String output_str = "";
+                    StringBuilder allValues = new StringBuilder();
                     for (int value: value_list) {
-                        output_str += value + ",";
+                        allValues.append(value).append(",");
                     }     
-                    output_builder.append(" ").append(key).append(": ").append(output_str).append("\n");
+                    output_builder.append(" ").append(key.replace(pheno + ":", "")).append(": ").append(allValues).append("\n");
                 }
             }
             output_builder.append("\n");
         }
-        return header.toString().replaceFirst(".$","").replaceFirst(".$","") + "\n" + output_builder.toString();
+        return header.toString().replaceFirst(".$","").replaceFirst(".$","") + "\n\n" + output_builder.toString();
     }
         
     /**
@@ -9218,8 +9130,7 @@ public class Classification {
      * @param msaNames
      */
     public static void create_write_msa_trimmed_output(ArrayList<String> msaNames, String outdirMsa) {
-        System.out.println("");
-        System.out.println("\rReading alignments");
+        Pantools.logger.info("Reading alignments from {} for calculating variable/informative positions.", outdirMsa);
         ArrayList<String> nuc_or_prot_list = prepare_type_list_for_msa(); // list can contain "protein", "nucleotide" or both
         initialize_BLOSUM();
         delete_file_full_path(outdirMsa + "/groups_with_phenotype_specific_changes.txt");
@@ -9244,8 +9155,7 @@ public class Classification {
                 new File(output_path + "output/var_inf_positions/").mkdir(); // create directory
                 new File(output_path + "output/similarity_identity/").mkdir();// create directory
                 HashMap<String, Integer> shared_snps_map = new HashMap<>();
-                System.out.print("\r Reading " + nuc_or_prot + " alignments: " + group_counter + "/" + msaNames.size() +
-                        ". variable/informative sites: " + total_var_inf_sites[0] + "/" + total_var_inf_sites[1]);
+                Pantools.logger.debug("Reading {} alignment of {}: {}/{}. variable/informative sites: {}/{}.", nuc_or_prot, group_id, group_counter, msaNames.size(), total_var_inf_sites[0], total_var_inf_sites[1]);
                 group_counter ++;
 
                 String genome_order = output_path + "/input/genome_order.info";
@@ -9286,9 +9196,8 @@ public class Classification {
             if (nuc_or_prot_cap.equals("Protein")) {
                 additional_spaces = "   ";
             }
-            System.out.println("\r " + nuc_or_prot_cap + " variable sites   " + additional_spaces + ": " + total_var_inf_sites[0] + " (" + groups_with_var_sites.size() + " alignments)"
-                    + "                             \n" // spaces are intentional
-                    + " " + nuc_or_prot_cap +  " informative sites" + additional_spaces +": " + total_var_inf_sites[1] + " (" + groups_with_inf_sites.size() + " alignments)");
+            Pantools.logger.info(" {} variable sites: {} ({} alignments)", nuc_or_prot_cap, total_var_inf_sites[0], groups_with_var_sites.size());
+            Pantools.logger.info(" {} informative sites: {} ({} alignments)", nuc_or_prot_cap, total_var_inf_sites[1], groups_with_inf_sites.size());
         }
     }
 
@@ -9388,7 +9297,7 @@ public class Classification {
         HashMap<String, Relationship> is_similar_rels_map = new HashMap<>();
         for (Relationship rel : rels) {
             Node mrna_node1 = rel.getEndNode();
-            Long mrna_node_id1 = mrna_node1.getId();
+            long mrna_node_id1 = mrna_node1.getId();
             String protein_id1 = (String) mrna_node1.getProperty("protein_ID");
             int genome_nr1 = (int) mrna_node1.getProperty("genome");
             if (skip_array[genome_nr1-1]) {
@@ -9398,8 +9307,8 @@ public class Classification {
             // only one relation but don't know the direction of the relation yet
             for (Relationship is_similar_rel : is_similar_rels) {
                 Node mrna_node2 = is_similar_rel.getEndNode();
-                Long mrna_node_id2 = mrna_node2.getId();
-                if (mrna_node_id1.equals(mrna_node_id2)) {
+                long mrna_node_id2 = mrna_node2.getId();
+                if (mrna_node_id1 == mrna_node_id2) {
                     mrna_node2 = is_similar_rel.getStartNode();
                 }
                 int genome_nr2 = (int) mrna_node2.getProperty("genome");
@@ -9507,7 +9416,7 @@ public class Classification {
             pheno_specific_header.append("# Because --core-threshold is set, the phenotype specific thresholds were lowered to ").append(core_threshold)
                     .append("% (with at least 1 genome) of the original threshold\n");
         }
-        write_string_to_file_full_path(pheno_specific_header.toString() + pheno_specific_output, 
+        write_string_to_file_full_path(pheno_specific_header + pheno_specific_output,
                 output_path + "/phenotype_specific_changes_" +  nuc_or_prot_short + ".txt");
         groups_with_phenotype_specific_changes(pheno_specific_output, output_path, nuc_or_prot_short);
     }
@@ -9519,7 +9428,9 @@ public class Classification {
      * @param output_path
      * @param nuc_or_prot 
      */
-    public static void groups_with_phenotype_specific_changes(String pheno_specific_output, String output_path, String nuc_or_prot) { 
+    public static void groups_with_phenotype_specific_changes(String pheno_specific_output, String output_path, String nuc_or_prot) {
+        StringBuilder pheno_specific_header = new StringBuilder("##Selected phenotype: " + PHENOTYPE + "\n");
+
         String[] line_array = pheno_specific_output.split("\n");
         HashMap<String, HashSet<String>> sequence_phenotype = new HashMap<>();
         String hm_str = "";
@@ -9560,9 +9471,9 @@ public class Classification {
         }
         
         if (check_if_file_exists(output_path + "/groups_with_phenotype_specific_changes.txt")) {
-            appendStringToFileFullPath("\n" + nuc_or_prot_cap + output_str, output_path + "/groups_with_phenotype_specific_changes.txt");
+            appendStringToFileFullPath("\n" + nuc_or_prot_cap + pheno_specific_header + output_str, output_path + "/groups_with_phenotype_specific_changes.txt");
         } else {
-            write_string_to_file_full_path(nuc_or_prot_cap + output_str, output_path + "/groups_with_phenotype_specific_changes.txt");
+            write_string_to_file_full_path(nuc_or_prot_cap + pheno_specific_header + output_str, output_path + "/groups_with_phenotype_specific_changes.txt");
         }
     }
     
@@ -9761,8 +9672,7 @@ public class Classification {
      * @param outdirMsa
      */
     public static void create_write_msa_no_trimming_output(ArrayList<String> msaNames, String outdirMsa) {
-        System.out.println("");
-        System.out.println("\nReading alignments");
+        Pantools.logger.info("Reading alignments from {} for calculating variable/informative positions.", outdirMsa);
         ArrayList<String> nuc_or_prot_list = prepare_type_list_for_msa(); // list can contain "protein", "nucleotide" or both
         initialize_BLOSUM();
         delete_file_full_path(outdirMsa + "/groups_with_phenotype_specific_changes.txt");
@@ -9786,8 +9696,7 @@ public class Classification {
                 new File(outdirMsa + group_id + "/output/similarity_identity/").mkdir(); // create directory
                 new File(outdirMsa + group_id + "/output/var_inf_positions/").mkdir(); // create directory
                 HashMap<String, Integer> shared_snps_map = new HashMap<>();
-                System.out.print("\r Reading " + nuc_or_prot + " alignments: " + group_counter + "/" + msaNames.size() + ". variable/informative: "
-                        + total_var_inf_sites[0] + "/" + total_var_inf_sites[1]);
+                Pantools.logger.debug("Reading {} alignment of {}: {}/{}. variable/informative sites: {}/{}.", nuc_or_prot, group_id, group_counter, msaNames.size(), total_var_inf_sites[0], total_var_inf_sites[1]);
                 group_counter ++;
                 String output_dir1 = outdirMsa + group_id + "/output/";
                 String input_file = output_dir1 + nuc_or_prot_short + ".afa";
@@ -9828,9 +9737,8 @@ public class Classification {
             if (nuc_or_prot_cap.equals("Protein")) {
                 additional_spaces = "   ";
             }
-            System.out.println("\r " + nuc_or_prot_cap + " variable sites   " + additional_spaces + ": " + total_var_inf_sites[0] + " (" + total_var_groups + " alignments)"
-                    + "                             \n" // spaces are intentional
-                    + " " + nuc_or_prot_cap +  " informative sites" + additional_spaces +": " + total_var_inf_sites[1] + " (" + total_inf_groups + " alignments)");
+            Pantools.logger.info(" {} variable sites: {} ({} alignments)", nuc_or_prot_cap, total_var_inf_sites[0], total_var_groups);
+            Pantools.logger.info(" {} informative sites: {} ({} alignments)", nuc_or_prot_cap, total_var_inf_sites[1], total_inf_groups);
         }
     }
     
@@ -9901,9 +9809,9 @@ public class Classification {
         HashMap<String, Integer> pheno_specific_counter = new HashMap<>(); // the number of phenotype specific variations for this alignment
         StringBuilder pheno_specific_sites = new StringBuilder("Phenotype specific " + SNP_or_substi + " per position in the alignment."
                 + " Each row contains: position in the alignment, reference allele/other alleles, phenotype,"
-                + " number of sequences with the phenotype and letter/total sequences with phenotype\n");
+                + " number of sequences with the phenotype and letter/total sequences with phenotype\n\n");
 
-        pheno_log_header.append("Number of sequences per phenotype and required threshold\n");
+        pheno_log_header.append("#Number of sequences per phenotype and required threshold\n");
         HashMap<String, Integer> phenotypeCounts = new HashMap<>();
         for (int genome_nr : genome_order_list) {
             String pheno = geno_pheno_map.get(genome_nr);
@@ -9911,13 +9819,13 @@ public class Classification {
 
         }
 
+        pheno_log_header.append(PHENOTYPE + "\tNumber of sequences\tThreshold\n");
         for (String phenotype: phenotypeCounts.keySet()){
             int count = phenotypeCounts.get(phenotype);
             int threshold = phenotype_threshold_map.get(phenotype);
-            pheno_log_header.append(phenotype + ": " + count + "," + threshold + "\n");
+            pheno_log_header.append(phenotype + "\t" + count + "\t" + threshold + "\n");
         }
-        pheno_log_header.append("\n");
-
+        pheno_log_header.append("=========================================\n\n");
 
 
 
@@ -13408,39 +13316,40 @@ public class Classification {
             write_SB_to_file_in_DB(distribution_output2, "gene_classification/class_distribution2.csv"); 
         }*/
     }
-    
+
     /**
-     * Check if there were any phenotype nodes already.
-     * If --append was given by the user, retrieve all phenotype names (keys)
-     * When --append is not included, remove the phenotype nodes
-     * 
-     * @param existing_phenotypes
-     * @param pheno_node_array 
+     *
+     * @return array with phenotype nodes on genomeNr-1 positions
      */
-    public void check_existing_phenotype_nodes(HashSet<String> existing_phenotypes, Node[] pheno_node_array) {
-        int pheno_node_counter = 0;
-        ResourceIterator<Node> pheno_nodes = GRAPH_DB.findNodes(phenotype_label);
-        while (pheno_nodes.hasNext()) {
-            Node pheno_node = pheno_nodes.next();
-            pheno_node_counter ++;
-            if (APPEND) {
-                int genome_nr = (int) pheno_node.getProperty("genome");
-                pheno_node_array[genome_nr-1] = pheno_node;
-                pheno_node.getPropertyKeys();
-                Iterable<String> prop_keys = pheno_node.getPropertyKeys();
-                for (String key : prop_keys) {
-                    existing_phenotypes.add(key);
-                }
-            } else {
-                Iterable<Relationship> pheno_relations = pheno_node.getRelationships();
-                for (Relationship rel : pheno_relations) {
-                    rel.delete();
-                }
-                pheno_node.delete();
+    private Node[] getExistingPhenotypeNodes() {
+        Node[] phenotypeNodes = new Node[total_genomes];
+        try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
+            ResourceIterator<Node> phenotypeNodeIterator = GRAPH_DB.findNodes(phenotype_label);
+            while (phenotypeNodeIterator.hasNext()) {
+                Node phenotypeNode = phenotypeNodeIterator.next();
+                int genomeNr = (int) phenotypeNode.getProperty("genome");
+                phenotypeNodes[genomeNr - 1] = phenotypeNode;
+                phenotypeNode.getPropertyKeys();
             }
+            tx.success();
         }
-        if (pheno_node_counter == 0) { // no phenotypes nodes are present so the append functionatlities are not required  
-            APPEND = false; 
+        return phenotypeNodes;
+    }
+
+    /**
+     *
+     */
+    private void removePreviousPhenotypeNodes() {
+        try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
+            ResourceIterator<Node> phenotypeNodeIterator = GRAPH_DB.findNodes(phenotype_label);
+            while (phenotypeNodeIterator.hasNext()) {
+                Node phenotypeNode = phenotypeNodeIterator.next();
+                Iterable<Relationship> relations = phenotypeNode.getRelationships();
+                for (Relationship relation : relations) {
+                    relation.delete();
+                }
+            }
+            tx.success();
         }
     }
 
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/CoFeatures.java b/src/main/java/nl/wur/bif/pantools/pangenome/CoFeatures.java
index 362340e4d705d79a4d8803254e727328a3f28863..4faf5328a84f6ce5bf8c5c9795c6d3755bd24b31 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/CoFeatures.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/CoFeatures.java
@@ -1,5 +1,6 @@
 package nl.wur.bif.pantools.pangenome;
 
+import nl.wur.bif.pantools.pantools.Pantools;
 import org.neo4j.graphdb.*;
 import java.util.*;
 
@@ -16,13 +17,11 @@ import static nl.wur.bif.pantools.utils.Globals.cofeature_label;
 public class CoFeatures {
     private final int genomeNr;
     private final GraphDatabaseService graphDb;
-    private final boolean LOG;
     private HashSet<Node> coFeatures;
 
-    public CoFeatures(int genomeNr, GraphDatabaseService graphDb, boolean LOG) {
+    public CoFeatures(int genomeNr, GraphDatabaseService graphDb) {
         this.genomeNr = genomeNr;
         this.graphDb = graphDb;
-        this.LOG = LOG;
         this.coFeatures = new HashSet<>();
     }
 
@@ -73,9 +72,7 @@ public class CoFeatures {
      * co-features.
      */
     public void addCoFeaturesToPangenome() {
-        if (LOG) {
-            System.out.println("Handling " + coFeatures.size() + " co-features (" + coFeatures + ")");
-        }
+        Pantools.logger.debug("Handling {} co-feature ({})", coFeatures.size(), coFeatures);
 
         //convert hashset to iterator and empty the hashset
         Iterator<Node> coFeaturesIterator = coFeatures.iterator();
@@ -90,11 +87,8 @@ public class CoFeatures {
                 Node coFeature = coFeaturesIterator.next();
                 String id = (String) coFeature.getProperty("id");
 
-                if (LOG) {
-                    int[] address = (int[]) coFeature.getProperty("address");
-                    String type = (String) coFeature.getProperty("type");
-                    System.out.println("Co-feature " + id + " (" + type + ") at " + Arrays.toString(address));
-                }
+                Pantools.logger.debug("Co-feature {} ({}) at {}",
+                        id, coFeature.getProperty("type"), Arrays.toString((int[]) coFeature.getProperty("address")));
 
                 // add co-feature to unique co-features
                 uniqueCoFeatures.putIfAbsent(id, new HashSet<>());
@@ -125,15 +119,11 @@ public class CoFeatures {
      * @param coFeatures list of co-feature nodes belonging to the given ID
      */
     private void handleOneCoFeature(String coFeatureId, int genomeNr, HashSet<Node> coFeatures) {
-        if (LOG) {
-            System.out.println("Handling co-feature " + coFeatureId);
-        }
+        Pantools.logger.debug("Handling co-feature {}", coFeatureId);
 
         //continue if there are less than two co-features with the same ID
         if (coFeatures.size() < 2) {
-            if (LOG) {
-                System.out.println("Less than two co-features with ID " + coFeatureId + "; skipping");
-            }
+            Pantools.logger.debug("Less than two co-features with ID {}, skipping", coFeatureId);
             return;
         }
 
@@ -168,9 +158,10 @@ public class CoFeatures {
 
                     //update address of first co-feature
                     coFeature.setProperty("address", newAddress);
-                    if (LOG) {
-                        System.out.println("Updated address of " + coFeature.getProperty("id") + " to " + Arrays.toString(newAddress));
-                    }
+                    Pantools.logger.debug("Updated address of {} to {}", coFeature.getProperty("id"), Arrays.toString(newAddress));
+
+                    //add property to co-feature node for easy access to all co-features
+                    coFeature.addLabel(cofeature_label);
 
                     //add property to co-feature node for easy access to all co-features
                     coFeature.addLabel(cofeature_label);
@@ -179,9 +170,7 @@ public class CoFeatures {
                     Node firstMrnaNode = null;
                     for (Relationship rel : coFeature.getRelationships(RelTypes.is_parent_of)) {
                         Node child = rel.getEndNode();
-                        if (LOG) {
-                            System.out.println("Child of " + coFeature.getProperty("id") + ": " + child.getProperty("id") + " (" + child.getId() + ")");
-                        }
+                        Pantools.logger.debug("Child of {}: {} ({})", coFeature.getProperty("id"), child.getProperty("id"), child.getId());
 
                         // skip if the child appears not to be an mRNA node
                         if (!child.hasLabel(mRNA_label)) continue;
@@ -190,17 +179,13 @@ public class CoFeatures {
                             //update address of mRNA node
                             int[] newMrnaAddress = new int[]{genomeNr, newAddress[1], newAddress[2], newAddress[3]};
                             child.setProperty("address", newMrnaAddress);
-                            if (LOG) {
-                                System.out.println("Updated address of " + child.getProperty("id") + " to " + Arrays.toString(newMrnaAddress));
-                            }
+                            Pantools.logger.debug("Updated address of {} to {}", child.getProperty("id"), Arrays.toString(newMrnaAddress));
 
                             firstMrnaNode = child;
                         } else {
                             for (Relationship childRel : child.getRelationships(RelTypes.is_parent_of)) {
                                 //connect children of mRNA node to first mRNA node
-                                if (LOG) {
-                                    System.out.println("Connecting " + childRel.getEndNode().getProperty("id") + " to " + firstMrnaNode.getProperty("id"));
-                                }
+                                Pantools.logger.debug("Connecting {} to {}", childRel.getEndNode().getProperty("id"), firstMrnaNode.getProperty("id"));
 
                                 Node grandChild = childRel.getEndNode();
 
@@ -214,9 +199,7 @@ public class CoFeatures {
                             }
 
                             //delete mRNA node if it is not the first mRNA node (first delete all relationships)
-                            if (LOG) {
-                                System.out.println("Deleting " + child.getProperty("id") + " (" + child.getId() + ")");
-                            }
+                            Pantools.logger.debug("Deleting {} ({})", child.getProperty("id"), child.getId());
                             for (Relationship childRel : child.getRelationships()) {
                                 childRel.delete();
                             }
@@ -251,31 +234,26 @@ public class CoFeatures {
         try { // start database transaction
             Transaction tx = graphDb.beginTx();
 
-            if (LOG) {
-                System.out.println("Deleting " + coFeature.getProperty("id") + " (" + coFeature.getId() + ") with all its children");
-            }
+            Pantools.logger.debug("Deleting {} ({}) with all its children", coFeature.getProperty("id"), coFeature.getId());
 
             //co-features are connected to mRNA nodes with is_parent_of and codes_for relationships, mRNA nodes are connected to CDS nodes with is_parent_of and contributes_to relationships, and mRNA nodes are connected to exon nodes with is_parent_of relationships
             if (coFeature.hasRelationship(RelTypes.is_parent_of, Direction.OUTGOING)) {
                 for (Relationship rel : coFeature.getRelationships(RelTypes.is_parent_of, Direction.OUTGOING)) {
                     Node mrnaNode = rel.getEndNode();
-                    if (LOG) {
-                        System.out.println("Deleting mRNA " + mrnaNode.getProperty("id") + " (" + mrnaNode.getId() + ") with all its children");
-                    }
+                    Pantools.logger.debug("Deleting mRNA {} ({}) with all its children", mrnaNode.getProperty("id"), mrnaNode.getId());
 
                     // delete all relationships of mRNA node and gather all child nodes
                     HashSet<Node> childNodes = new HashSet<>();
                     for (Relationship mrnaRel : mrnaNode.getRelationships(RelTypes.is_parent_of, Direction.OUTGOING)) {
-                        if (LOG) {
-                            System.out.println("Marking end node of relationship " + mrnaRel.toString() + " for deletion");
-                        }
+                        Pantools.logger.debug("Marking end node of relationship {} for deletion", mrnaRel.toString());
                         childNodes.add(mrnaRel.getEndNode());
                     }
 
                     // delete all child nodes
                     for (Node childNode : childNodes) {
-                        if (LOG) {
-                            System.out.println("Deleting child " + childNode.getProperty("id") + " (" + childNode.getId() + ")");
+                        Pantools.logger.debug("Deleting child {} ({})", childNode.getProperty("id"), childNode.getId());
+                        for (Relationship childRel : childNode.getRelationships()) {
+                            childRel.delete();
                         }
                         for (Relationship childRel : childNode.getRelationships()) {
                             childRel.delete();
@@ -290,9 +268,7 @@ public class CoFeatures {
                     mrnaNode.delete();
                 }
             } else {
-                if (LOG) {
-                    System.out.println("Co-feature " + coFeature.getProperty("id") + " (" + coFeature.getId() + ") has no children");
-                }
+                Pantools.logger.debug("Co-feature {} ({}) has no children", coFeature.getProperty("id"), coFeature.getId());
             }
 
             // delete the co-feature node
@@ -339,7 +315,6 @@ public class CoFeatures {
     public String toString() {
         return "CoFeatures{" +
                 "graphDb=" + graphDb +
-                ", LOG=" + LOG +
                 ", genomeNr=" + genomeNr +
                 ", co-features=" + coFeatures +
                 "}";
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java b/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java
index aa0dac81f8b80b10a4dada1d23c17877294e02c5..3c6b809041bd6a1141b90fe90b55c77638b84ecc 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/FunctionalAnnotations.java
@@ -21,6 +21,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 
+import nl.wur.bif.pantools.pantools.Pantools;
 import org.apache.commons.lang.ArrayUtils;
 import org.neo4j.graphdb.Direction;
 import org.neo4j.graphdb.Label;
@@ -1540,7 +1541,7 @@ public class FunctionalAnnotations {
         for (Node hm_node : hm_nodes_list) {
             hm_counter ++;
             if (hm_counter % 100 == 0 || hm_counter == total_hmgroups || hm_counter < 100) {
-                System.out.print("\rGathering functions from homology groups: " + hm_counter + "/" + total_hmgroups );
+                Pantools.logger.info("Retrieving function information: {}/{} homology groups.", hm_counter, total_hmgroups);
             }
             HashMap<String, int[]> function_counter_map = new HashMap<>();
             // key is functional annotation (id or description). value is array with counts per genome
@@ -1597,8 +1598,8 @@ public class FunctionalAnnotations {
             if (Mode.contains("CLASSIFICATION")) {
                 hmgroup_class = hmgroup_class_map.get(hm_node);
                 if (hmgroup_class == null) {
-                    System.out.println("The 'homology_group' node " + hm_node + " was not part of the previous 'gene_classification' run\n" +
-                            "Make sure you have the same genome/sequence selection");
+                    Pantools.logger.error("The 'homology_group' node {} was not part of the previous 'gene_classification' run", hm_node);
+                    Pantools.logger.error("Make sure you have the same genome/sequence selection.");
                     System.exit(1);
                 }
             } else {
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java
index 8d6230c8d8632aab3918b0b1c3a9845ee27a285f..43dab18105fb4044ddb5b83540e87e622bd742c9 100755
--- a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java
@@ -5,53 +5,36 @@
  */
 package nl.wur.bif.pantools.pangenome;
 
-import nl.wur.bif.pantools.alignment.BoundedLocalSequenceAlignment;
-import nl.wur.bif.pantools.alignment.LocalSequenceAlignment;
-import htsjdk.samtools.SAMException;
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMFileWriter;
-import htsjdk.samtools.SAMFileWriterFactory;
-import htsjdk.samtools.SAMProgramRecord;
-import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.*;
 import htsjdk.samtools.fastq.FastqReader;
 import htsjdk.samtools.fastq.FastqRecord;
+import nl.wur.bif.pantools.alignment.BoundedLocalSequenceAlignment;
+import nl.wur.bif.pantools.alignment.LocalSequenceAlignment;
 import nl.wur.bif.pantools.index.IndexDatabase;
-import nl.wur.bif.pantools.sequence.SequenceScanner;
 import nl.wur.bif.pantools.index.IndexPointer;
 import nl.wur.bif.pantools.index.IndexScanner;
 import nl.wur.bif.pantools.index.kmer;
+import nl.wur.bif.pantools.pangenome.parallel.LocalizeNodesParallel;
+import nl.wur.bif.pantools.sequence.SequenceScanner;
+import org.neo4j.graphdb.*;
 
 import java.io.*;
-import java.util.Arrays;
-import org.neo4j.graphdb.GraphDatabaseService;
-import org.neo4j.graphdb.Node;
-import org.neo4j.graphdb.Relationship;
-import org.neo4j.graphdb.RelationshipType;
-import org.neo4j.graphdb.ResourceIterator;
-import org.neo4j.graphdb.Transaction;
-import org.neo4j.graphdb.Direction;
+import java.nio.file.Files;
+import java.nio.file.NotDirectoryException;
+import java.nio.file.Path;
+import java.nio.file.attribute.FileAttribute;
 import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.ListIterator;
-import java.util.NoSuchElementException;
-import java.util.PriorityQueue;
-import java.util.Queue;
-import java.util.Random;
+import java.util.*;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Stream;
 import java.util.zip.GZIPInputStream;
 
-import org.neo4j.graphdb.NotFoundException;
 import static nl.wur.bif.pantools.pangenome.Classification.genome_overview;
 import static nl.wur.bif.pantools.utils.Globals.*;
+import static nl.wur.bif.pantools.utils.Utils.complement;
 import static nl.wur.bif.pantools.utils.Utils.*;
 
 /**
@@ -60,6 +43,9 @@ import static nl.wur.bif.pantools.utils.Utils.*;
  * @author Siavash Sheikhizadeh, Eef Jonkheer, Bioinformatics group, Wageningen University, the Netherlands.
  */
 public class GenomeLayer {
+    private static final String NUCLEOTIDE_NODE_IDS_FILE_NAME = "nucleotide-node-ids.csv";
+    public final String SCRATCH_DIRECTORY_ENVIRONMENT_VARIABLE_NAME = "PANTOOLS_SCRATCH_DIRECTORY";
+    public final String DEFAULT_SCRATCH_DIRECTORY_PREFIX = "pantools-scratch-";
     private Node curr_node;
     private byte curr_side;
     private boolean finish;
@@ -1748,7 +1734,7 @@ public class GenomeLayer {
             OUTPUT_PATH += "read_mapping/";
             new File(WORKING_DIRECTORY + "read_mapping").mkdir();
         }
-        report_number_of_threads(true, true);
+        report_number_of_threads();
         int i, j, t, genome;
         Node pangenome_node;
         BufferedWriter out;
@@ -1805,6 +1791,7 @@ public class GenomeLayer {
         number_of_hits = new AtomicLong(0); 
         ArrayList<Integer>[] genome_numbers = retrieve_genomes_to_map_against();
         adj_total_genomes = genome_numbers[0].size();
+        System.err.println("OUT FORMAT: " + OUTFORMAT);
         if (OUTFORMAT.equals("BAM") || OUTFORMAT.equals("SAM")) {
             sams = new SAMFileWriter[genomeDb.num_genomes + 1 + extra];
             headers = new SAMFileHeader[genomeDb.num_genomes + 1 + extra];
@@ -1998,7 +1985,7 @@ public class GenomeLayer {
             for (int c = 0; in.ready();) {
                 line_counter ++; 
                 if (line_counter % 100 == 0 || line_counter < 10) {
-                    System.out.print("\rVerifying if all input files exist: " + line_counter);
+                    System.out.println("\rVerifying if all input files exist: " + line_counter);
                 }
                 String line = in.readLine().trim();
                 new_genomes_file.append(line).append("\n");
@@ -2122,12 +2109,15 @@ public class GenomeLayer {
      * Constructs a pangenome (gDBG) for a set of genomes.
      * build_pangenome()
      */
-    public void initialize_pangenome() {
+    public void initialize_pangenome(Path databaseDirectory, Path scratchDirectory, int numBuckets, int transactionSize,
+                                     int numDbWriterThreads, int nodePropertiesCacheSize, boolean keepIntermediateFiles)
+                                    throws IOException {
         System.out.println("\nConstructing the pangenome graph database\n");
         check_kmc_version(); // check if program is set to $PATH and if appropriate version
         Node pangenome_node;
         verify_if_all_genome_files_exist();
-        create_pangenome_database();
+        scratchDirectory = createScratchDirectory(scratchDirectory);
+        createPangenomeDatabase(databaseDirectory);
         indexSc = new IndexScanner(indexDb);
         K_SIZE = indexSc.get_K();
         System.out.println("\nk-size = "+K_SIZE);
@@ -2143,9 +2133,26 @@ public class GenomeLayer {
         num_bases = 0;
         num_degenerates = 0;
         construct_pangenome(pangenome_node);
-        add_sequence_properties();
-        localize_nodes();
-        
+
+        add_sequence_properties(scratchDirectory);
+        try {
+            highest_frequency = new LocalizeNodesParallel(
+                scratchDirectory,
+                numBuckets,
+                transactionSize,
+                numDbWriterThreads,
+                nodePropertiesCacheSize,
+                keepIntermediateFiles
+            ).run(scratchDirectory);
+        } catch (Exception e) {
+            System.err.println("Error occurred during localization: " + e);
+            e.printStackTrace(System.err);
+            System.exit(1);
+        } finally {
+            if (!keepIntermediateFiles)
+                deleteDirectoryRecursively(scratchDirectory);
+        }
+
         try (Transaction tx = GRAPH_DB.beginTx()) {
             pangenome_node.setProperty("k_mer_size", K_SIZE);
             pangenome_node.setProperty("num_k_mers", indexSc.length());
@@ -2174,12 +2181,13 @@ public class GenomeLayer {
     /**
      * Adds new genomes to an available pangenome.
      */
-    public void add_genomes() {
+    public void add_genomes(Path scratchDirectory) throws IOException {
         System.out.println("\nAdding additional genomes to an already existing pangenome\n");
         check_if_program_exists_stdout("kmc -h", 100, "kmc"); // check if program is set to $PATH
         int previous_num_genomes;
         Node pangenome_node;
         verify_if_all_genome_files_exist();
+        scratchDirectory = createScratchDirectory(scratchDirectory);
         connect_pangenome();
         try (Transaction tx = GRAPH_DB.beginTx()) {
             pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
@@ -2207,10 +2215,11 @@ public class GenomeLayer {
     // the edge colors should be dropped out as they will change and localize_nodes() function will rebuild them again.
         drop_edges_colors();
         construct_pangenome(pangenome_node);
-        add_sequence_properties();
+        add_sequence_properties(scratchDirectory);
         localize_nodes();
         genome_overview();
-        
+
+        deleteDirectoryRecursively(scratchDirectory);
         System.out.println("\rNumber of kmers:   " + indexSc.length());
         System.out.println("Number of nodes:   " + num_nodes);
         System.out.println("Number of edges:   " + num_edges);
@@ -3312,47 +3321,47 @@ public class GenomeLayer {
      * Extracts the sequence of the nodes from the genome database and store it 
      * in the nodes.
      */
-    void add_sequence_properties() {
-        int trsc = 0, node_length;
-        int[] addr;
-        Node node;
-        ResourceIterator<Node> nodes_iterator;
-        LinkedList<Node> nodes = new LinkedList();
-        StringBuilder sequence = new StringBuilder();
-        System.out.println("Adding sequence to the nodes...");
-        try(Transaction tx = GRAPH_DB.beginTx()) {
-            nodes_iterator = GRAPH_DB.findNodes(nucleotide_label);
-            while (nodes_iterator.hasNext()) {
-                nodes.add(nodes_iterator.next());
+    void add_sequence_properties(Path scratchDirectory) throws IOException {
+        final long start = System.currentTimeMillis();
+        // TODO: store IDs more efficiently, or find another way to stream in IDs without caching the whole list
+        final Path nucleotideNodeIdsFile = scratchDirectory.resolve(NUCLEOTIDE_NODE_IDS_FILE_NAME);
+        long numNodes = 0;
+        try (Transaction ignored = GRAPH_DB.beginTx();
+             BufferedWriter writer = Files.newBufferedWriter(nucleotideNodeIdsFile);
+             ResourceIterator<Node> itr = GRAPH_DB.findNodes(nucleotide_label)) {
+            while (itr.hasNext()) {
+                writer.write(Long.toString(itr.next().getId()));
+                writer.write('\n');
+                numNodes++;
             }
-            nodes_iterator.close();
-            tx.success();
         }
-        Transaction tx = GRAPH_DB.beginTx();
-        try {
-        //num_bases = K - 1; // for the missed overlapped of the last node of each sequence which will not be stored 
-            while (!nodes.isEmpty()) {
-                node = nodes.remove();
-                addr = (int[]) node.getProperty("address");
-                node_length = (int) node.getProperty("length");
-                num_bases += node_length;
-                sequence.setLength(0);
-                //num_bases += node_length - K + 1;
-                //node.setProperty("sequence", genomeDb.get_sequence(addr[0], addr[1], addr[2], node_length - K + 1, true).toString());
-                genomeSc.get_sub_sequence(sequence, addr[0], addr[1], addr[2], node_length, true);
-                node.setProperty("sequence", sequence.toString());
-                ++trsc;
-                if (trsc >= MAX_TRANSACTION_SIZE) {
+
+        // Read back node IDs, chunk them up into transactions and write the sequences
+
+        try (BufferedReader reader = Files.newBufferedReader(nucleotideNodeIdsFile)) {
+            for (long i = 0; i < numNodes; i += MAX_TRANSACTION_SIZE) {
+                // Read chunks of max. transaction size
+                try (Transaction tx = GRAPH_DB.beginTx()) {
+                    for (long j = 0; j < Math.min(MAX_TRANSACTION_SIZE, numNodes - i); j++) {
+                        final long id = Long.parseLong(reader.readLine());
+                        final Node node = GRAPH_DB.getNodeById(id);
+                        final int[] address = (int[]) node.getProperty("address");
+                        final int length = (int) node.getProperty("length");
+                        num_bases += length;
+                        final StringBuilder sequence = new StringBuilder();
+                        genomeSc.get_sub_sequence(sequence, address[0], address[1], address[2], length, true);
+                        node.setProperty("sequence", sequence.toString());
+                    }
+
                     tx.success();
-                    tx.close();
-                    tx = GRAPH_DB.beginTx();
-                    trsc = 0;
                 }
             }
-            tx.success();
-        } finally {
-            tx.close();
         }
+
+        // Clean up
+
+        Files.delete(nucleotideNodeIdsFile);
+        System.out.printf("Finished adding sequence properties phase in %,d ms.%n", System.currentTimeMillis() - start);
     }
 
     /**
@@ -3427,4 +3436,43 @@ public class GenomeLayer {
         }
         return size / 1048576 + 1;
     }
+
+    /**
+     * Create a scratch directory. If it set to null, a new scratch directory will be created by
+     * {@link Files#createTempDirectory(String, FileAttribute[])}. If it is set, it is checked whether the directory is
+     * empty. If it is not empty a RuntimeException will be thrown. If the directory does not exist, it will be created.
+     * @return path to the scratch directory.
+     */
+    public Path createScratchDirectory(Path directory) throws IOException {
+        if (directory == null)
+            return Files.createTempDirectory(DEFAULT_SCRATCH_DIRECTORY_PREFIX);
+        else {
+            // If a path to a scratch directory is given, make sure it is empty OR create it
+            if (Files.notExists(directory))
+                Files.createDirectory(directory);
+            else {
+                try (Stream<Path> files = Files.list(directory)) {
+                    if (files.findAny().isPresent())
+                        throw new RuntimeException("user-specified scratch directory " + directory + " should be empty");
+                } catch (NotDirectoryException ignored) {
+                    throw new RuntimeException("user-specified scratch directory " + directory + " is not a directory");
+                }
+            }
+        }
+        return directory;
+    }
+
+    /**
+     * Delete a directory and its contents.
+     * @param directory path to directory.
+     */
+    public void deleteDirectoryRecursively(Path directory) throws IOException {
+        try (Stream<Path> paths = Files.walk(directory)) {
+            //noinspection ResultOfMethodCallIgnored
+            paths
+                .sorted(Comparator.reverseOrder())
+                .map(Path::toFile)
+                .forEach(File::delete);
+        }
+    }
 }
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java b/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java
index 49ec4c0f8d60d43c52cacae08acf2c8e82f7fd7c..2d45b7ac4485d69dedd008d0ea9e38d2bf839f72 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java
@@ -3,7 +3,9 @@ package nl.wur.bif.pantools.pangenome;
 import static nl.wur.bif.pantools.pangenome.create_skip_arrays.create_skip_arrays;
 
 import nl.wur.bif.pantools.index.IndexScanner;
+import nl.wur.bif.pantools.pantools.Pantools;
 import nl.wur.bif.pantools.sequence.SequenceScanner;
+import nl.wur.bif.pantools.utils.Utils;
 import org.neo4j.graphdb.*;
 
 import java.io.*;
@@ -17,12 +19,11 @@ import static nl.wur.bif.pantools.utils.Utils.*;
 
 public class MultipleSequenceAlignment {
     private String outDirMsa;
-    private final String method;
+    private final String msaMethod;
     private final boolean variation;
-    private final boolean progress;
     private boolean alignProtein;
     private boolean alignNucleotide;
-    private final List<Integer> hmGroups;
+    private final List<Long> hmGroups;
     private ArrayList<Node> hmNodeList;
     private ArrayList<String> msaNames;
 
@@ -33,19 +34,13 @@ public class MultipleSequenceAlignment {
 
     /**
      *
-     * @param method can be either: "per_group", "multiple_groups"
+     * @param msaMethod can be: "per_group", "multiple_groups", "functions", "regions"
      * @param variation whether to use variation
-     * @param progress show progress
      * @param hmGroups an array with homology groups
      */
-    public MultipleSequenceAlignment(String method, boolean variation, boolean progress, List<Integer> hmGroups) {
-        this.method = method;
+    public MultipleSequenceAlignment(String msaMethod, boolean variation, List<Long> hmGroups) {
+        this.msaMethod = msaMethod;
         this.variation = variation; //TODO: not yet implemented
-        if (LOG) {
-            this.progress = true;
-        } else {
-            this.progress = progress;
-        }
         this.hmGroups = hmGroups;
 
         prepareSequences();
@@ -55,9 +50,9 @@ public class MultipleSequenceAlignment {
      * Creates input files for multiple sequence alignment
      */
     private void prepareSequences() {
-        if (progress) {
-            System.out.println("\nMultiple sequence alignment for " + msaMethod + ".\n");
-            report_number_of_threads(true, true); // prints how many threads were selected by user
+        Pantools.logger.info("Multiple sequence alignment for {}.", msaMethod);
+        if (Pantools.logger.isDebugEnabled()) {
+            report_number_of_threads(); // prints how many threads were selected by user
         }
 
         check_if_graphDb_is_available(); // starts up the graph database if needed
@@ -80,13 +75,13 @@ public class MultipleSequenceAlignment {
 
             check_current_grouping_version(); // check which version of homology grouping is active
             if (grouping_version < 1) {
-                System.out.println("\nNo homology grouping is active\n");
+                Pantools.logger.error("No homology grouping is active.");
                 System.exit(1);
             }
 
             tx.success(); // transaction successful, commit changes
         } catch (NotFoundException nfe) {
-            System.out.println("Unable to start the pangenome database\n");
+            Pantools.logger.error("Unable to start the pangenome database.");
             System.exit(1);
         }
 
@@ -95,8 +90,8 @@ public class MultipleSequenceAlignment {
             connect_pangenome();
         }
 
-        if (PHENOTYPE == null && progress) {
-            System.out.println("No --phenotype was provided, unable to find phenotype specific SNPs\n");
+        if (PHENOTYPE == null) {
+            Pantools.logger.info("No --phenotype was provided, unable to find phenotype specific SNPs");
         }
 
         //prepare sequences //TODO create input should not be this amount of copying code, can be simplified
@@ -104,12 +99,12 @@ public class MultipleSequenceAlignment {
             switch(msaMethod) { //set outDir + create input files + set msaNames
                 case "per_group":
                     this.outDirMsa = WORKING_DIRECTORY + "alignments/msa_" + msaMethod + "/grouping_v" + grouping_version + "/";
-                    this.hmNodeList = findHmNodes(hmGroups);
+                    this.hmNodeList = Utils.findHmNodes(hmGroups, 1);
                     this.msaNames = createInputMsaPerGroup(false);
                     break;
                 case "multiple_groups":
                     this.outDirMsa = WORKING_DIRECTORY + "alignments/msa_" + msaMethod + "/grouping_v" + grouping_version + "/";
-                    this.hmNodeList = findHmNodes(hmGroups);
+                    this.hmNodeList = Utils.findHmNodes(hmGroups, 1);
                     this.msaNames = createInputMsaMultipleGroups();
                     break;
                 case "regions":
@@ -123,7 +118,7 @@ public class MultipleSequenceAlignment {
                     this.msaNames = createInputMsaFunctions();
                     break;
                 default:
-                    System.out.println("Did not recognise the following MSA method: " + msaMethod);
+                    Pantools.logger.error("Did not recognise the following MSA method: {}", msaMethod);
                     System.exit(1);
             }
 
@@ -140,41 +135,29 @@ public class MultipleSequenceAlignment {
      */
     private void checkModeForMsa() {
         if (FAST) { // using --mode fast was the old argument to skip tree constructions. New argument is --fast
-            if (progress) {
-                System.out.println("\r--fast was included: not creating any phylogenies.");
-            }
+            Pantools.logger.info("--fast was included: not creating any phylogenies.");
             FAST = true;
         }
 
         if (PROTEOME) {
             this.alignProtein = true;
             this.alignNucleotide = false;
-            if (progress) {
-                System.out.println("\rRunning against a proteome. Only aligning protein sequences.");
-            }
+            Pantools.logger.info("Running against a proteome. Only aligning protein sequences.");
 //        } else if (VARIANTS) { //TODO not the same as variants yet
-//            if (progress) {
-//                System.out.println("\rRunning with variants. Only aligning nucleotide sequences.");
-//            }
+//            Pantools.logger.info("Running with variants. Only aligning nucleotide sequences.");
 //            this.alignProtein = false;
 //            this.alignNucleotide = true;
         } else {
             if (Mode.contains("NUCLEOTIDE")) {
-                if (progress) {
-                    System.out.println("\rNucleotide mode selected. Only aligning nucleotide sequences.");
-                }
+                Pantools.logger.info("Nucleotide mode selected. Only aligning nucleotide sequences.");
                 this.alignProtein = false;
                 this.alignNucleotide = true;
             } else if (Mode.contains("PROTEIN")) {
-                if (progress) {
-                    System.out.println("\rProtein mode selected. Only aligning protein sequences.");
-                }
+                Pantools.logger.info("Protein mode selected. Only aligning protein sequences.");
                 this.alignProtein = true;
                 this.alignNucleotide = false;
             } else if (Mode.equals("0")) { // nothing was included
-                if (progress) {
-                    System.out.println("\rNo --mode protein or nucleotide was selected. Aligning both types of sequences, if applicable.");
-                }
+                Pantools.logger.info("No --mode protein or nucleotide was selected. Aligning both types of sequences, if applicable.");
                 this.alignProtein = true;
                 this.alignNucleotide = true;
             } else {
@@ -192,10 +175,10 @@ public class MultipleSequenceAlignment {
             String type = (String) GRAPH_DB.findNodes(accession_label).next().getProperty("type");
             assert type.equals("VCF");
         } catch (NotFoundException | NoSuchElementException nee) {
-            System.out.println("\nNo variants were found in this pangenome. Please run add_accessions first.\n");
+            Pantools.logger.error("No variants were found in this pangenome. Please run add_accessions first.");
             System.exit(1);
         } catch (AssertionError e) {
-            System.out.println("\nSomehow variants were found, but they appear to not to be in VCF format.\n");
+            Pantools.logger.error("Somehow variants were found, but they appear to not to be in VCF format.");
             System.exit(1);
         }
     }
@@ -249,18 +232,16 @@ public class MultipleSequenceAlignment {
         ArrayList<String> updatedHmNodeList = new ArrayList<>(); // exclude homology groups with only 1 sequence
         long totalHmgroups = hmNodeList.size();
         int groupCounter = 0, notUniqueCounter = 0;
-        if (progress) {
-            System.out.println();
-        }
         int printThreshold = 100;
         if (totalHmgroups < 500) {
             printThreshold = 10;
         }
 
+        Pantools.logger.info("Preparing input files for {} homology groups.", totalHmgroups);
         for (Node hmNode : hmNodeList) {
             groupCounter++;
-            if ((groupCounter % printThreshold == 0 || groupCounter == totalHmgroups || groupCounter < 50) && progress) {
-                System.out.print("\rCreating input sequences: " + groupCounter + "/" + totalHmgroups);
+            if ((groupCounter % printThreshold == 0 || groupCounter == totalHmgroups || groupCounter < 50)) {
+                Pantools.logger.debug("Creating input sequences: {} / {}", groupCounter, totalHmgroups);
             }
 
             checkIfHmIsCorrectGrouping(hmNode);
@@ -272,9 +253,7 @@ public class MultipleSequenceAlignment {
             new File(outputPath).mkdirs(); // create directory
 
             if (!checkIfSkippingIsCorrect(inputPath)) {
-                if (LOG) {
-                    System.out.println("Skipping is incorrect, removing old files.");
-                }
+                Pantools.logger.info("Skipping is incorrect, removing old files.");
                 delete_directory(inputPath);
                 delete_directory(outputPath);
                 new File(inputPath).mkdirs(); // create directory
@@ -310,14 +289,13 @@ public class MultipleSequenceAlignment {
             write_string_to_file_full_path("all sequences were extracted", outDirMsa + "extract_sequences_done");
         }
 
-        if (progress) {
-            if (updatedHmNodeList.size() < hmNodeList.size()) {
-                System.out.println("\rNumber of selected groups lowered from " + hmNodeList.size() + " to " + updatedHmNodeList.size() +
-                        " because of only 1 sequence in group");
-            } else {
-                System.out.println();
-            }
+        if (hmNodeList.size() > updatedHmNodeList.size()) {
+            Pantools.logger.info("Lowered the number of homology groups from {} to {} because some groups only had 1 sequence.", hmNodeList.size(), updatedHmNodeList.size());
         }
+
+        Pantools.logger.info("Finished preparing input files for {} homology groups.", updatedHmNodeList.size());
+        Pantools.logger.debug("Homology group input files created for: {}.", updatedHmNodeList);
+
         return updatedHmNodeList;
     }
 
@@ -335,9 +313,6 @@ public class MultipleSequenceAlignment {
         }
         String hmString = String.join("_", hmStringList);
         outputHmStringList.add(hmString);
-        if (progress) {
-            System.out.println();
-        }
 
         String path = outDirMsa + hmString;
         String inputPath = path + "/input/";
@@ -346,9 +321,7 @@ public class MultipleSequenceAlignment {
         new File(outputPath).mkdirs(); // create directory
 
         if (!checkIfSkippingIsCorrect(inputPath)) {
-            if (LOG) {
-                System.out.println("Skipping is incorrect, removing old files.");
-            }
+            Pantools.logger.info("Skipping is incorrect, removing old files.");
             delete_directory(inputPath);
             delete_directory(outputPath);
             new File(inputPath).mkdirs(); // create directory
@@ -378,11 +351,11 @@ public class MultipleSequenceAlignment {
      */
     private ArrayList<String> createInputMsaRegions() {
         if (target_genome != null || skip_genomes != null) { //stop if --ref or --skip is used
-            System.out.println("'--method regions' is unable to use --reference/-ref or --skip. You should exclude these genomes from your --regions-file\n");
+            Pantools.logger.error("'--method regions' is unable to use --reference/-ref or --skip. You should exclude these genomes from your --regions-file");
             System.exit(1);
         }
         if (alignProtein && !alignNucleotide) { //stop if only protein should be aligned
-            System.out.println("Cannot run MSA for regions with protein sequence.");
+            Pantools.logger.error("Cannot run MSA for regions with protein sequence.");
             System.exit(1);
         }
 
@@ -416,7 +389,7 @@ public class MultipleSequenceAlignment {
      */
     private ArrayList<String> createInputMsaFunctions() {
         if (SELECTED_NAME == null) { //TODO use all functions
-            System.out.println("No functions selected. Please use --gene-names with a comma-separated list of function names.");
+            Pantools.logger.error("No functions selected. Please use --gene-names with a comma-separated list of function names.");
             System.exit(1);
         }
 
@@ -425,16 +398,10 @@ public class MultipleSequenceAlignment {
 
         long totalFunctions = functionNodeList.size();
         int functionCounter = 0;
-        int printThreshold = 100;
-        if (totalFunctions < 500) {
-            printThreshold = 10;
-        }
 
         for (Node functionNode : functionNodeList) {
             functionCounter++;
-            if ((functionCounter % printThreshold == 0 || functionCounter == totalFunctions || functionCounter < 50) && progress) {
-                System.out.print("\rCreating input sequences: " + functionCounter + "/" + totalFunctions);
-            }
+            Pantools.logger.info("Creating input sequences: {} / {}", functionCounter, totalFunctions);
 
             String function = (String) functionNode.getProperty("id");
             function = function.replace(":", "");
@@ -446,9 +413,7 @@ public class MultipleSequenceAlignment {
             new File(outputPath).mkdirs(); // create directory
 
             if (!checkIfSkippingIsCorrect(inputPath)) {
-                if (LOG) {
-                    System.out.println("Skipping is incorrect, removing old files.");
-                }
+                Pantools.logger.info("Skipping is incorrect, removing old files.");
                 delete_directory(inputPath);
                 delete_directory(outputPath);
                 new File(inputPath).mkdirs(); // create directory
@@ -474,40 +439,6 @@ public class MultipleSequenceAlignment {
         return outputFunctionsStringList;
     }
 
-    /**
-     * Get all currently active homology groups
-     * @param hmGroups a list of homology group ids (in case of null, all homology groups are retrieved)
-     * @return arraylist of homology group nodes that have more than one member
-     */
-    private ArrayList<Node> findHmNodes(List<Integer> hmGroups) {
-        ArrayList<Node> hmList = new ArrayList<>();
-        if (progress) {
-            System.out.print("\rNo homology groups were provided via --homology-groups or -homology-file, using all ");
-        }
-        if (hmGroups == null) {
-            try (ResourceIterator<Node> hmNodes = GRAPH_DB.findNodes(homology_group_label)) {
-                while (hmNodes.hasNext()) {
-                    Node hmNode = hmNodes.next();
-                    int numMembers = (int) hmNode.getProperty("num_members");
-                    if (numMembers > 1) {
-                        hmList.add(hmNode);
-                    }
-                }
-            } catch (Exception e) {
-                System.out.println("Error while retrieving homology groups");
-                System.exit(1);
-            }
-        } else {
-            for (int hmGroup : hmGroups) {
-                Node hmNode = GRAPH_DB.getNodeById(hmGroup);
-                test_if_correct_label(hmNode, homology_group_label, true);
-                hmList.add(hmNode);
-            }
-        }
-
-        return hmList;
-    }
-
     /**
      * Check if node has 'homology_group' label
      * @param hmNode node to be checked
@@ -519,11 +450,13 @@ public class MultipleSequenceAlignment {
         if (!correct) {
             correct = test_if_correct_label(hmNode, inactive_homology_group_label, false);
             if (!correct) {
-                System.out.println(hmNode + " is not an homology group");
+                Pantools.logger.error("Node {} is not a homology group.", hmNode);
             } else {
                 int groupVersion = (int) hmNode.getProperty("group_version");
-                System.out.println("\n" + hmNode + " is an inactive homology group! Grouping " + grouping_version +
-                        " is active while this homology group is version " + groupVersion);
+                Pantools.logger.error("Node {} is an inactive homology group! Grouping {} is active while this homology group is version {}.",
+                        hmNode,
+                        grouping_version,
+                        groupVersion);
             }
             System.exit(1);
         }
@@ -628,7 +561,7 @@ public class MultipleSequenceAlignment {
 
             if (variation) {
                 //get nucleotide sequences of accessions
-                System.out.println("Not yet implemented");
+                Pantools.logger.error("Not yet implemented.");
                 System.exit(1);
             }
 
@@ -711,9 +644,7 @@ public class MultipleSequenceAlignment {
                     int copyNr = genomeCounts[genomeNr-1];
                     totalSequences ++;
 
-                    if (progress) {
-                        System.out.print("\rGathering sequences: " + totalGroups + "/" + hmNodeList.size() + " homology groups. " + totalSequences + " sequences");
-                    }
+                    Pantools.logger.info("Gathering sequences: {} / {} homology groups. {} sequences", totalGroups, hmNodeList.size(), totalSequences);
 
                     if (proteinId.contains(":") || proteinId.contains(";") || proteinId.contains("|")) { // not allowed in newick format
                         proteinId = proteinId
@@ -761,9 +692,6 @@ public class MultipleSequenceAlignment {
             }
             tx.success(); // transaction successful, commit changes
         }
-        if (progress) {
-            System.out.println();
-        }
         new File(inputPath + "similarity_identity").mkdirs(); // create directory
         new File(inputPath + "var_inf_positions").mkdirs(); // create directory
 
@@ -805,15 +733,10 @@ public class MultipleSequenceAlignment {
 
         long totalRegions = regionsToSearch.length;
         int regionCounter = 0;
-        int printThreshold = 100;
-        if (totalRegions < 500) {
-            printThreshold = 10;
-        }
+
         for (String regionStr : regionsToSearch) {
             regionCounter++;
-            if ((regionCounter % printThreshold == 0 || regionCounter == totalRegions || regionCounter < 50) && progress) {
-                System.out.print("\rCreating input sequences: " + regionCounter + "/" + totalRegions);
-            }
+            Pantools.logger.info("Creating input sequences: {} / {}", regionCounter, totalRegions);
 
             String[] regionArray = regionStr.split(" ");
             int genomeNr;
@@ -826,7 +749,7 @@ public class MultipleSequenceAlignment {
                 startPos = Integer.parseInt(regionArray[2]);
                 endPos = Integer.parseInt(regionArray[3]);
             } catch (NumberFormatException nfe) {
-                System.out.println("Unable to correctly retrieve four numbers in: " + regionStr);
+                Pantools.logger.warn("Unable to correctly retrieve four numbers in: {}", regionStr);
                 continue;
             }
 
@@ -839,11 +762,11 @@ public class MultipleSequenceAlignment {
             }
 
             if (genomeNr > total_genomes) {
-                System.out.println("A genome with number " + genomeNr + " does not exists! Only " + total_genomes + " genomes in pangenome\n");
+                Pantools.logger.error("A genome with number {} does not exists! Only {} genomes in pangenome", genomeNr, total_genomes);
                 System.exit(1);
             }
             if (endPos < startPos) {
-                System.out.println("The start position is lower as the end position. " + regionStr + " (for reverse complement, please use a '-')\n");
+                Pantools.logger.error("The start position is lower as the end position. {} (for reverse complement, please use a '-')", regionStr);
                 System.exit(1);
             }
 
@@ -859,13 +782,10 @@ public class MultipleSequenceAlignment {
 
             genomeNrsBuilder.append(genomeNr).append(",").append(regionStr).append(rev_complement).append("\n");
             if (region.length() < 10) {
-                System.out.println("The length of " + regionStr + " is below 10 characters.");
+                Pantools.logger.warn("The length of {} is below 10 characters.", regionStr);
                 System.exit(1);
             }
         }
-        if (progress) {
-            System.out.println();
-        }
 
         write_string_to_file_full_path(fastaBuilder.toString(), inputPath + "/nuc.fasta");
         write_string_to_file_full_path(genomeNrsBuilder.toString(), inputPath + "/genome_order.info");
@@ -904,7 +824,7 @@ public class MultipleSequenceAlignment {
                     allRelations = functionNode.getRelationships(RelTypes.has_tigrfam);
                     break;
                 default:
-                    System.out.println(functionNode + " is not a function node.");
+                    Pantools.logger.error("{} is not a function node.", functionNode);
                     System.exit(1);
             }
         }
@@ -978,7 +898,7 @@ public class MultipleSequenceAlignment {
 
             if (variation) {
                 //get nucleotide sequences of accessions
-                System.out.println("Not yet implemented");
+                Pantools.logger.error("Not yet implemented.");
                 System.exit(1);
             }
 
@@ -1031,7 +951,7 @@ public class MultipleSequenceAlignment {
                 }
                 break;
             default:
-                System.out.println("Only CDS or protein sequence can currently be retrieved");
+                Pantools.logger.error("Only CDS or protein sequence can currently be retrieved.");
                 System.exit(1);
         }
 
@@ -1049,7 +969,7 @@ public class MultipleSequenceAlignment {
      */
     private static String getRegion(int genome, int sequence, int begin, int end, boolean forward) {
         if (begin > end) {
-            System.out.println("\nStart location of this region is higher than its end location -> " + genome + " " + sequence + " " + begin + " " +end );
+            Pantools.logger.error("Start location of this region is higher than its end location -> {} {} {} {}.", genome, sequence, begin, end);
             System.exit(1);
         }
         StringBuilder seq = new StringBuilder();
@@ -1200,9 +1120,7 @@ public class MultipleSequenceAlignment {
         boolean pass = false; // pass when all alignment (& tree) files exists
         int loopCounter = 0;
         while (!pass) {
-            if (LOG) {
-                System.out.println("Checking if MSA is complete for: " + msaNames);
-            }
+            Pantools.logger.debug("Checking if MSA is complete for: {}", msaNames);
             ArrayList<String> toBeAlignedList = checkIfMsaIsComplete(msaNames, !trim);
             if (toBeAlignedList.isEmpty()) {
                 pass = true;
@@ -1214,17 +1132,9 @@ public class MultipleSequenceAlignment {
                 runParallelFasttree(msaNames, !trim);
             }
 
-
-            if (progress) {
-                System.out.println();
-            }
             loopCounter ++;
             if (loopCounter == 3) {
-                System.out.print("\nFailed to align " + toBeAlignedList.size() + " groups: ");
-                for (String groupNode : toBeAlignedList) {
-                    System.out.print(groupNode + ", ");
-                }
-                System.out.println("");
+                Pantools.logger.warn("Failed to align {} groups: {}", toBeAlignedList.size(), toBeAlignedList);
                 System.exit(1);
             }
         }
@@ -1241,17 +1151,12 @@ public class MultipleSequenceAlignment {
                 }
             } catch (ArrayIndexOutOfBoundsException e) {
                 e.printStackTrace();
-                System.out.println("This error is likely caused by skipping different genomes for this run compared to a previous run.");
+                Pantools.logger.error("This error is likely caused by skipping different genomes for this run compared to a previous run.");
                 System.exit(1);
             }
-            if (progress) {
-                printResultsMsa();
-            }
+            printResultsMsa();
             Classification.include_identity_similarity_to_relations(msaNames, trim);
         }
-        if (progress) {
-            System.out.println();
-        }
     }
 
     /**
@@ -1294,14 +1199,9 @@ public class MultipleSequenceAlignment {
                 doneCount ++;
             }
 
-            if (((notDoneCount + doneCount) % 100 == 0 || (notDoneCount + doneCount) == hmNodeList.size()) && progress) {
-                System.out.print("\r Checking MSA completeness -> " + notDoneCount + " not done, " + doneCount + " done ");
-            }
+            Pantools.logger.debug("Checking MSA completeness -> {} not done, {} done", notDoneCount, doneCount);
         }
 
-        if (progress) {
-            System.out.print("\r                                                                            "); // spaces are intentional
-        }
         return toBeAlignedList;
     }
 
@@ -1312,6 +1212,8 @@ public class MultipleSequenceAlignment {
      * @param trim whether input sequences are trimmed or not
      */
     private void runParallelMafft(ArrayList<String> hmNodeList, boolean trim) {
+        Pantools.logger.info("Aligning {} sequences.", hmNodeList.size());
+
         String dot = ".";
         if (trim) {
             dot = dot.replace(".","_trimmed.");
@@ -1353,18 +1255,15 @@ public class MultipleSequenceAlignment {
         for (Future<?> future : futures) {
             try {
                 future.get();
-                if (progress) {
-                    System.out.print("\rAligning sequences: " + counter + "/" + total);
-                }
+                Pantools.logger.debug("Aligning sequences: {} / {}", counter, total);
                 counter++;
             } catch (InterruptedException | ExecutionException e) {
                 e.printStackTrace();
             }
         }
         es.shutdownNow(); //shutdown ExecutorService
-        if (progress) {
-            System.out.println();
-        }
+
+        Pantools.logger.info("Finished aligning {} sequences.", counter - 1);
     }
 
     /**
@@ -1373,6 +1272,8 @@ public class MultipleSequenceAlignment {
      * @param trim whether input is trimmed or not
      */
     private void runParallelFasttree(ArrayList<String> hmNodeList, boolean trim) {
+        Pantools.logger.info("Building trees for {} multiple sequence alignments.", hmNodeList.size());
+
         String dot = ".";
         if (trim) {
             dot = dot.replace(".","_trimmed.");
@@ -1413,18 +1314,15 @@ public class MultipleSequenceAlignment {
         for (Future<?> future : futures) {
             try {
                 future.get();
-                if (progress) {
-                    System.out.print("\rPhylogeny inference: " + counter + "/" + total);
-                }
+                Pantools.logger.debug("Phylogeny inference: {} / {}", counter, total);
                 counter++;
             } catch (InterruptedException | ExecutionException e) {
                 e.printStackTrace();
             }
         }
         es.shutdownNow(); //shutdown ExecutorService
-        if (progress) {
-            System.out.println();
-        }
+
+        Pantools.logger.info("Finished building trees for {} multiple sequence alignments.", counter - 1);
     }
 
     /**
@@ -1451,7 +1349,7 @@ public class MultipleSequenceAlignment {
                 }
             }
         } catch (IOException ioe) {
-            System.out.println("\nUnable to read the input fasta : " + inputFasta + "\n");
+            Pantools.logger.error("Unable to read the input fasta: {}", inputFasta);
             System.exit(1);
         }
 
@@ -1462,7 +1360,7 @@ public class MultipleSequenceAlignment {
                 lineCounter ++;
                 if (lineCounter == 1) {
                     if (!line.startsWith("CLUSTAL")) {
-                        System.out.println(inputAfa);
+                        Pantools.logger.error("File is not in afa (CLUSTAL) format: {}.", inputAfa);
                         System.exit(1);
                     }
                 }
@@ -1483,7 +1381,7 @@ public class MultipleSequenceAlignment {
                 sequenceMap.put(seqCounter, seqBuilder);
             }
         } catch (IOException ioe) {
-            System.out.println("\nUnable to read the input alignment: " + inputAfa + "\n");
+            Pantools.logger.error("Unable to read the input alignment: {}.", inputAfa);
             System.exit(1);
         }
 
@@ -1494,7 +1392,7 @@ public class MultipleSequenceAlignment {
                 out1.write(">" + sequenceList.get(i-1) + "\n" +value.toString());
             }
         } catch (IOException ioe) {
-            System.out.println("\nUnable to read: " + outputFile + "\n");
+            Pantools.logger.error("Unable to read: {}.", outputFile);
             System.exit(1);
         }
     }
@@ -1508,6 +1406,8 @@ public class MultipleSequenceAlignment {
      * NB: Should be correct for both nucleotide and protein now
      */
     private void trimPrealignedProteins() {
+        Pantools.logger.info("Trimming pre-aligned proteins.");
+
         int groupCounter = 0;
         ArrayList<String> suitableGroups = new ArrayList<>();
         ArrayList<String> excludedGroups = new ArrayList<>();
@@ -1516,9 +1416,7 @@ public class MultipleSequenceAlignment {
             String path = outDirMsa + groupId + "/";
             String inPath = path + "input/";
             String outPath = path + "output/" ;
-            if ((groupCounter % 10 == 0 || groupCounter == msaNames.size()) && progress) {
-                System.out.print("\rCreating trimmed sequences: " + groupCounter + "/" + msaNames.size());
-            }
+            Pantools.logger.debug("Creating trimmed sequences: {} / {}", groupCounter, msaNames.size());
             boolean pass = checkIfGroupIsTrimmed(groupId, inPath, suitableGroups, excludedGroups);
             if (pass) { // group is already trimmed
                 continue;
@@ -1555,20 +1453,17 @@ public class MultipleSequenceAlignment {
         if (excludedGroups.size() > 0) { // one ore multiple groups were excluded due to trimmed edges overlapping
             write_string_to_file_full_path(excludedGroups.toString().replace("[","").replace("]","").replace(" ",""),
                     outDirMsa + "/groups_excluded_based_on_trimming.txt");
-            if (progress) {
-                System.out.println("\rCreating trimmed sequences: " + groupCounter + "/" + msaNames.size()
-                        + "; Excluded " + excludedGroups.size() + " alignments. "
-                        + "See " + outDirMsa + "/groups_excluded_based_on_trimming.txt");
-            }
-        } else if (progress) {
-            System.out.println("\rCreating trimmed sequences: " + groupCounter + "/" + msaNames.size() + "; Excluded 0 alignments");
+            Pantools.logger.info("Groups excluded based on trimming: {}; see {} for more information.",
+                    excludedGroups.toString(),
+                    outDirMsa + "/groups_excluded_based_on_trimming.txt");
+            Pantools.logger.debug("Groups excluded based on trimming: {}", excludedGroups.toString());
+        } else {
+            Pantools.logger.info("No groups excluded based on trimming.");
         }
 
         if (suitableGroups.isEmpty()) {
-            if (progress) {
-                System.out.println("\n\nNone of the trimmed alignments can be used for the second alignment round. "
-                        + "Use '--mode no-trimming' to further analyze them\n");
-            }
+            Pantools.logger.warn("None of the trimmed alignments can be used for the second alignment round. "
+                    + "Use '--mode no-trimming' to further analyze them");
         } else {
             // msaNames is a global variable and is read by this function again after restarting
             this.msaNames = suitableGroups;
@@ -1701,7 +1596,7 @@ public class MultipleSequenceAlignment {
             seqTrimMap.put(prevSeq, startEndGapArray);
             sequenceMap.put(prevSeq, sequence);
         } catch (IOException e) {
-            System.out.println(e.getMessage());
+            Pantools.logger.error(e.getMessage());
             System.exit(1);
         }
         return alignmentLength;
@@ -1756,7 +1651,6 @@ public class MultipleSequenceAlignment {
                 }
 
                 if (line.contains(">")) {
-                    //System.out.println("prev seq " + prevSeq + "." );
                     if (prevSeq.length() > 1) {
                         int[] trimArray = seqTrimMap.get(prevSeq);
                         int trimSum = trimArray[0] + trimArray[1];
@@ -1787,7 +1681,7 @@ public class MultipleSequenceAlignment {
             outputBuilder.append(prevSeq).append("\n");
             trimSequence(sequence, trimArray, outputBuilder, nucleotide);
         } catch (IOException e) {
-            System.out.println("\n" + e.getMessage());
+            Pantools.logger.error(e.getMessage());
             System.exit(1);
         }
 
@@ -1816,7 +1710,6 @@ public class MultipleSequenceAlignment {
             removeNucsEnd = removeNucsEnd*3;
         }
         removeNucsEnd = sequence.length() - removeNucsEnd; // adjust the ending with the number of nucleotides
-        //System.out.println(removeNucsStart + " " + removeNucsEnd);
         String trimmedSeq = "";
         for (int i = 0; i < sequence.length(); i++) {
             char aa = sequence.charAt(i);
@@ -1869,20 +1762,19 @@ public class MultipleSequenceAlignment {
      * Prints MSA results
      */
     private void printResultsMsa() {
-        System.out.println("\nOutput written to:\n" + outDirMsa + "\n");
+        Pantools.logger.info("Output written to: {}", outDirMsa);
         if (PHENOTYPE == null) {
-            System.out.println("Provide a phenotype to find phenotype specific variants! (--phenotype)");
+            Pantools.logger.info("Provide a phenotype to find phenotype specific variants! (--phenotype)");
         } else {
-            System.out.println("\n\nPhenotype output written to:\n"
-                    + " " + outDirMsa + "groups_with_phenotype_specific_changes.txt");
+            Pantools.logger.info("Phenotype output written to:");
 
+            Pantools.logger.info(" {}", outDirMsa + "groups_with_phenotype_specific_changes.txt");
             if (alignNucleotide) { // nucleotide sequences
-                System.out.print(" " + outDirMsa + "phenotype_specific_changes_nuc.txt\n");
+                Pantools.logger.info(" {}", outDirMsa + "phenotype_specific_changes_nuc.txt");
             }
             if (alignProtein) { // protein sequences
-                System.out.print(" " + outDirMsa + "phenotype_specific_changes_prot.txt\n");
+                Pantools.logger.info(" {}", outDirMsa + "phenotype_specific_changes_prot.txt");
             }
-            System.out.println();
         }
     }
 
@@ -1939,9 +1831,7 @@ public class MultipleSequenceAlignment {
                     " 1> " +
                     output;
 
-            if (LOG) {
-                System.out.println(mafftCommand);
-            }
+            Pantools.logger.debug("MAFFT command: {}", mafftCommand);
 
             runCommand(mafftCommand); //run MAFFT
             convertAfaToAlignedFasta(input, output); //convert output of MAFFT to fasta format
@@ -1990,13 +1880,11 @@ public class MultipleSequenceAlignment {
                     " 1> " + output +
                     " 2> " + log;
 
-            if (LOG) {
-                System.out.println(fasttreeCommand);
-            }
+            Pantools.logger.debug("FastTree command: {}", fasttreeCommand);
 
             runCommand(fasttreeCommand); //run FastTree
 
-            if (!LOG) {
+            if (!Pantools.logger.isDebugEnabled()) {
                 delete_file_full_path(log);
             }
 
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java b/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java
index 6f37a894f6a8bec20d4156c3d641d9a651f3cdcb..a1f9cdd40682c38d9b4ce71284354c6c38d7b1a9 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java
@@ -6,13 +6,11 @@
 package nl.wur.bif.pantools.pangenome;
 
 import nl.wur.bif.pantools.index.IndexScanner;
+import nl.wur.bif.pantools.pantools.Pantools;
 import nl.wur.bif.pantools.sequence.SequenceScanner;
 import nl.wur.bif.pantools.utils.Utils;
 import org.apache.commons.lang.ArrayUtils;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
 import org.neo4j.graphdb.*;
-import scala.Int;
 
 import java.io.*;
 import java.nio.file.Paths;
@@ -35,7 +33,6 @@ import static nl.wur.bif.pantools.utils.Utils.*;
  */
 public class Phylogeny {
 
-    Logger phylogenyLogger;
     private BlockingQueue<String> string_queue;
     private static AtomicLong atomic_counter1;
     private static AtomicLong atomic_counter2;
@@ -46,10 +43,6 @@ public class Phylogeny {
     final static public String[] COLORBLIND_CODES = new String[]{"#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00",
             "#CC79A7", "#999999", "#F0E442", "#000000"}; // colourblind friendly palette up to 8 colors (9th value is black)
 
-    public Phylogeny() {
-        this.phylogenyLogger = LogManager.getLogger(Phylogeny.class);
-    }
-
     /**
      * Function for mlsa_find_genes() and find_genes_by_name()
      * Requires
@@ -290,7 +283,7 @@ public class Phylogeny {
         if (PHENOTYPE == null) {
             System.out.println("Important! No --phenotype was provided, only genome numbers will be shown in the final phylogeny\n");
         }
-        report_number_of_threads(true, true); // prints how many threads were selected by user
+        report_number_of_threads(); // prints how many threads were selected by user
         check_if_graphDb_is_available(); // starts up the graph database if needed
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
             Node pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
@@ -318,7 +311,7 @@ public class Phylogeny {
         create_directory_in_DB("mlsa/output/var_inf_positions");
         System.out.println("\nMLSA Step 3/3. Multiple sequence alignment of concatenated sequences\n");
         check_if_graphDb_is_available(); // starts up the graph database if needed
-        String threads_str = report_number_of_threads(true, true); // prints how many threads were selected by user
+        String threads_str = report_number_of_threads(); // prints how many threads were selected by user
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
             Node pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
             stop_if_panproteome(pangenome_node, "MLSA"); // stops the progam when run on a panproteome, retrieves K_size & total_genomes
@@ -1494,7 +1487,7 @@ public class Phylogeny {
             System.out.println("\nCalculate ANI scores using fastANI\n");
             program = "fastANI";
             check_if_program_exists_stderr("fastANI -h", 100, "fastANI", true); // check if program is set to $PATH
-            report_number_of_threads(true, true); // prints how many threads were selected by user
+            report_number_of_threads(); // prints how many threads were selected by user
         } else {
             System.out.println("Mode not recognized");
             System.exit(1);
@@ -2241,7 +2234,7 @@ public class Phylogeny {
      * @param hmGroups an array of the selected hmGroups (if null, only the single copy orthologs are used)
      * @throws IOException if the single copy orthologs file cannot be read
      */
-    public void core_snp_tree(List<Integer> hmGroups) throws IOException {
+    public void core_snp_tree(List<Long> hmGroups) throws IOException {
         System.out.println("\nInferring core phylogeny from SNPs or Amino acid substitutions identified in single-copy-orthologous groups\n");
         check_if_graphDb_is_available(); // starts up the graph database if needed
         FAST = true; // skip the fastTree phylogeny inference per homology group
@@ -2276,7 +2269,6 @@ public class Phylogeny {
         MultipleSequenceAlignment msa = new MultipleSequenceAlignment(
                 msaMethod,
                 false,
-                true,
                 hmGroups
         );
         System.out.println();
@@ -2566,7 +2558,7 @@ public class Phylogeny {
      * contain polytomies. So we will need to wait until they update ASTRAL-Pro to accept polytomies before updating to
      * a more recent version of their software.
      */
-    public void consensus_tree(List<Integer> hmGroups) {
+    public void consensus_tree(List<Long> hmGroups) {
         System.out.println("\nBuilding phylogeny using user-specified groups\n"
                 + "Using ASTRAL-PRO (https://github.com/chaoszhang/ASTER v1.3) for this.\n\n"
                 + "Please cite the original authors:\n"
@@ -2619,7 +2611,6 @@ public class Phylogeny {
         MultipleSequenceAlignment msa = new MultipleSequenceAlignment(
                 "per_group",
                 false,
-                true,
                 hmGroups
         );
         msa.alignSequences(true, false);
@@ -2663,7 +2654,7 @@ public class Phylogeny {
             args = new String[]{"astral-pro",  "-o", species_tree_file, all_gene_tree_file};
         }
         System.out.println("\rRunning ASTRAL-PRO for all alignments. (Database is unlocked and can be used by other functions.)");
-        if (phylogenyLogger.isDebugEnabled()) {
+        if (Pantools.logger.isDebugEnabled()) {
             System.out.println("ASTRAL-PRO command: " + Arrays.toString(args));
         }
         ExecCommand.ExecCommand(args);
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java
index 5668473d4d35a8e14a8874d66178b065778e3ec0..03be9e7a583bfc63093bb620fc9b972821f8759e 100644
--- a/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java
@@ -6,51 +6,20 @@
 package nl.wur.bif.pantools.pangenome;
 
 import nl.wur.bif.pantools.alignment.LocalSequenceAlignment;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.LinkedList;
-import java.util.ListIterator;
-import java.util.Queue;
+import org.neo4j.graphdb.*;
+
+import java.io.*;
+import java.nio.file.Path;
 import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TreeSet;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.LinkedBlockingQueue;
+import java.util.*;
+import java.util.concurrent.*;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.logging.Level;
 import java.util.logging.Logger;
-import org.neo4j.graphdb.Direction;
-import org.neo4j.graphdb.Node;
-import org.neo4j.graphdb.NotFoundException;
-import org.neo4j.graphdb.Relationship;
-import org.neo4j.graphdb.ResourceIterator;
-import org.neo4j.graphdb.Transaction;
-import static nl.wur.bif.pantools.pangenome.GenomeLayer.getFolderSize;
-import org.neo4j.graphdb.Label;
-
-import static nl.wur.bif.pantools.pangenome.Classification.determine_frequency_list_int;
-import static nl.wur.bif.pantools.pangenome.Classification.genome_overview;
-import static nl.wur.bif.pantools.pangenome.Classification.get_annotation_identifiers;
-import static nl.wur.bif.pantools.pangenome.Classification.get_percentage_str;
-import static nl.wur.bif.pantools.pangenome.Classification.split_seq_in_parts_of_80bp;
-import static nl.wur.bif.pantools.pangenome.Classification.try_incr_AL_hashmap;
-import static nl.wur.bif.pantools.pangenome.Classification.try_incr_SB_hashmap;
-import static nl.wur.bif.pantools.pangenome.Classification.try_incr_hashmap;
 
+import static nl.wur.bif.pantools.pangenome.Classification.*;
+import static nl.wur.bif.pantools.pangenome.GenomeLayer.getFolderSize;
 import static nl.wur.bif.pantools.pangenome.create_skip_arrays.create_skip_arrays;
-
 import static nl.wur.bif.pantools.utils.Globals.*;
 import static nl.wur.bif.pantools.utils.Utils.*;
 
@@ -75,8 +44,8 @@ public class ProteomeLayer {
     private long[][] kmers_proteins_list;
     private int[] kmer_frequencies;
     private BlockingQueue<Node> proteins;
-    private BlockingQueue<nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection> intersections;
-    private BlockingQueue<nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection> similarities;
+    private BlockingQueue<Intersection> intersections;
+    private BlockingQueue<Intersection> similarities;
     private BlockingQueue<LinkedList> components; 
     private BlockingQueue<LinkedList> homology_groups_list; 
     private Node pangenome_node;
@@ -378,7 +347,7 @@ public class ProteomeLayer {
                                             crossing_protein = get_protein_sequence(crossing_protein_node);
                                             shorter_len = Math.min(protein_length, crossing_protein.length());
                                             if (counter >= frac * (shorter_len - PEPTIDE_SIZE + 1)) {
-                                                intersections.put(new nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection(protein_node, crossing_protein_node, 0));
+                                                intersections.put(new Intersection(protein_node, crossing_protein_node, 0));
                                                 ++num_ins;
                                             }
                                         }
@@ -393,7 +362,7 @@ public class ProteomeLayer {
                                     crossing_protein = get_protein_sequence(crossing_protein_node);
                                     shorter_len = Math.min(protein_length, crossing_protein.length());
                                     if (counter >= frac * (shorter_len - PEPTIDE_SIZE + 1)) {
-                                        intersections.put(new nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection(protein_node, crossing_protein_node,0));
+                                        intersections.put(new Intersection(protein_node, crossing_protein_node,0));
                                         ++num_ins;
                                     }
                                 }
@@ -408,7 +377,7 @@ public class ProteomeLayer {
                     System.out.print("0 ......................................... 100\n  "); 
                 // Signify the end of intersections queue.
                     for (i = 0; i < THREADS; ++i)
-                        intersections.put(new nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection(null, null,-1));// end of queue
+                        intersections.put(new Intersection(null, null,-1));// end of queue
                 } catch(InterruptedException e) {
                     System.err.println(e.getMessage());
                 }
@@ -454,7 +423,7 @@ public class ProteomeLayer {
         public void run() {
             Node protein_node1, protein_node2;
             String protein1, protein2;
-            nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection ints;
+            Intersection ints;
             int num_ints = 0;
             boolean all_intersections_found = false;
             int trsc = 0;
@@ -505,7 +474,7 @@ public class ProteomeLayer {
                 // Signify the end of the similarities queue.
                     //similarities.put(new intersection(null, null,0));
                     Node pangenome_node = GRAPH_DB.getNodeById(0);
-                    similarities.put(new nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection(pangenome_node, null, 0));
+                    similarities.put(new Intersection(pangenome_node, null, 0));
                     tx.success();
                 } finally {
                     tx.close();
@@ -567,7 +536,7 @@ public class ProteomeLayer {
 
         @Override
         public void run() {
-            nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection ints;
+            Intersection ints;
             int finished_thread_counter = 0;
             try{
                 ints = similarities.take();
@@ -577,7 +546,7 @@ public class ProteomeLayer {
                             if (ints.protein2 == null) {
                                 finished_thread_counter ++;
                                 if (finished_thread_counter == THREADS-2) {
-                                    similarities.put(new nl.wur.bif.pantools.pangenome.ProteomeLayer.Intersection(null, null, 0));
+                                    similarities.put(new Intersection(null, null, 0));
                                 }
                                 ints = similarities.take();
                                 continue;
@@ -969,7 +938,7 @@ public class ProteomeLayer {
     /**
      * Constructs the proteome layer of the pan-genome from a set of proteins.
      */
-    public void initialize_panproteome() {
+    public void initialize_panproteome(Path databaseDirectory) throws IOException {
         check_if_program_exists_stdout("kmc -h", 100, "kmc"); // check if program is set to $PATH
         if (PATH_TO_THE_PROTEOMES_FILE == null) {
             System.out.println("\nNo protein file provided via --proteomes-file or -pf\n");
@@ -981,7 +950,7 @@ public class ProteomeLayer {
         int trsc, num_proteins = 0, genome;
         String[] fields;
         check_if_all_proteome_files_exist(); 
-        create_panproteome_database();
+        CreatePanproteomeDatabase(databaseDirectory);
         try(BufferedReader protein_paths = new BufferedReader(new FileReader(PATH_TO_THE_PROTEOMES_FILE))) { 
             for (genome = 1; (file_path = protein_paths.readLine()) != null; ++genome) {
                 String previous_ID_line = "";
@@ -1675,7 +1644,7 @@ public class ProteomeLayer {
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
             pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
             check_if_panproteome(pangenome_node); // sets PROTEOME boolean that controls functions, retrieves K_size & total_genomes
-            report_number_of_threads(false, false); //do not print how many threads were given, only when more than 1 thread was selected
+            report_number_of_threads(); //do not print how many threads were given, only when more than 1 thread was selected
             tx.success(); // transaction successful, commit changes
         } catch (NotFoundException nfe) {
             System.out.println("Unable to start the database\n");
@@ -1995,7 +1964,7 @@ public class ProteomeLayer {
     public void change_active_grouping() {
         System.out.println("\nChanging 'inactive_homology_groups' back to 'homology_groups'\n");
         delete_file_in_DB("log/active_grouping_version");
-        report_number_of_threads(false, false); //print how many threads were given, only 1 thread allowed
+        report_number_of_threads(); //print how many threads were given, only 1 thread allowed
         check_if_graphDb_is_available(); // starts up the graph database if needed
         if (target_genome != null && GROUPING_VERSION == null) {
             GROUPING_VERSION = target_genome; // --reference argument is changed to --version in manual
@@ -2133,7 +2102,7 @@ public class ProteomeLayer {
         check_if_graphDb_is_available(); // starts up the graph database if needed
         if (standalone) {
             System.out.println("\nMoving the homology nodes so 'group' can be run again\n");
-            report_number_of_threads(true, false); //print how many threads were given, only 1 thread allowed
+            report_number_of_threads(); //print how many threads were given, only 1 thread allowed
         }
         try (Transaction tx = GRAPH_DB.beginTx()) { // start database transaction
             pangenome_node = GRAPH_DB.findNodes(pangenome_label).next();
@@ -2830,7 +2799,7 @@ public class ProteomeLayer {
         System.out.println("\nClustering protein sequences\n");
         check_if_program_exists_stdout("mcl -h", 100, "mcl"); // check if program is set to $PATH
         check_if_graphDb_is_available(); // starts up the graph database if needed
-        report_number_of_threads(true, true); // prints how many threads were selected by user
+        report_number_of_threads(); // prints how many threads were selected by user
         delete_files_from_previous_group_run();
         
         create_directory_in_DB("group");
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/export/NodeUUIDAndLabel.java b/src/main/java/nl/wur/bif/pantools/pangenome/export/NodeUUIDAndLabel.java
new file mode 100644
index 0000000000000000000000000000000000000000..6fbdebef8bb7bc278d870a5a980f6599debbc98a
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/export/NodeUUIDAndLabel.java
@@ -0,0 +1,34 @@
+package nl.wur.bif.pantools.pangenome.export;
+
+import org.neo4j.graphdb.Label;
+
+/**
+ * Convenience class for storing a node's UUID and label. The reason we're packing both fields into one class is
+ * for reasons of optimization: in order to generate both fields we need to look at both a node's type and its
+ * properties.
+ */
+public class NodeUUIDAndLabel {
+    private final String uuid;
+    private final Label label;
+
+    public NodeUUIDAndLabel(String uuid, Label label) {
+        this.uuid = uuid;
+        this.label = label;
+    }
+
+    public String getUuid() {
+        return uuid;
+    }
+
+    public Label getLabel() {
+        return label;
+    }
+
+    @Override
+    public String toString() {
+        return "NodeUUIDAndLabel{" +
+            "uuid='" + uuid + '\'' +
+            ", label=" + label +
+            '}';
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/export/PangenomeExporter.java b/src/main/java/nl/wur/bif/pantools/pangenome/export/PangenomeExporter.java
new file mode 100644
index 0000000000000000000000000000000000000000..828db77f0c12e87e3ea03189b42edd72eae33452
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/export/PangenomeExporter.java
@@ -0,0 +1,392 @@
+package nl.wur.bif.pantools.pangenome.export;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Streams;
+import nl.wur.bif.pantools.pangenome.export.records.NodeProperty;
+import nl.wur.bif.pantools.pangenome.export.records.Record;
+import nl.wur.bif.pantools.pangenome.export.records.RelationshipProperty;
+import nl.wur.bif.pantools.pangenome.export.records.SequenceAnchor;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+import org.neo4j.graphdb.*;
+import org.neo4j.graphdb.factory.GraphDatabaseFactory;
+import org.neo4j.graphdb.factory.GraphDatabaseSettings;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import static nl.wur.bif.pantools.utils.Globals.*;
+import static nl.wur.bif.pantools.utils.Utils.*;
+
+
+/**
+ * Main class and entry point for exporting a pangenome. Will export pangenome
+ * contents as three CSV files for comparison purposes.
+ *
+ */
+public class PangenomeExporter {
+    // TODO: we're using names because RelTypes.x contains ordinal properties, and relationship types don't
+    private static final Set<String> VALID_RELATIONSHIP_TYPE_NAMES = new HashSet<>(Arrays.asList(
+        RelTypes.has.name(),
+        RelTypes.FF.name(),
+        RelTypes.FR.name(),
+        RelTypes.RF.name(),
+        RelTypes.RR.name()
+    ));
+
+    private static final Map<Label, Set<String>> NODE_PROPERTIES_TO_SKIP = ImmutableMap.<Label, Set<String>>builder()
+        .put(pangenome_label, ImmutableSet.of("date"))
+        .put(genome_label, ImmutableSet.of("date", "path"))
+        .put(sequence_label, ImmutableSet.of("anchor_sides", "anchor_nodes", "anchor_positions"))
+        .put(degenerate_label, ImmutableSet.of("sequence"))
+        .put(nucleotide_label, ImmutableSet.of("sequence"))
+        .build();
+
+
+    /**
+     * Create a new pangenome exporter, exporting a database to files of three types:
+     * <p>
+     * 1. Node properties;
+     * 2. Relationship properties;
+     * 3. Sequence anchors.
+     * </p>
+     * @param nodePropertiesOutputPath path to node properties output file.
+     * @param relationshipPropertiesOutputPath path to relationship properties output file.
+     * @param sequenceAnchorsOutputPath path to sequence anchors output file.
+     * @throws IOException in case of Neo4j or output file IO error.
+     */
+    public PangenomeExporter(
+        Path nodePropertiesOutputPath,
+        Path relationshipPropertiesOutputPath,
+        Path sequenceAnchorsOutputPath) throws IOException {
+
+        exportNodeProperties(nodePropertiesOutputPath);
+        exportRelationshipProperties(relationshipPropertiesOutputPath);
+        exportSequenceAnchors(sequenceAnchorsOutputPath);
+    }
+
+    private void exportRelationshipProperties(Path outputFile) throws IOException {
+        try (Transaction ignored = GRAPH_DB.beginTx();
+             ResourceIterator<Relationship> relationships = GRAPH_DB.getAllRelationships().iterator();
+             CSVPrinter printer = getCSVPrinter(outputFile)) {
+
+            final Stream<RelationshipProperty> anchors = relationships
+                .stream()
+                .flatMap(relationship -> getRelationshipPropertyRecords(GRAPH_DB, relationship));
+
+            printRecords(printer, anchors);
+        }
+    }
+
+    private void exportNodeProperties(Path outputFile) throws IOException {
+        try (Transaction ignored = GRAPH_DB.beginTx();
+             ResourceIterator<Node> nodes = GRAPH_DB.getAllNodes().iterator();
+             CSVPrinter printer = getCSVPrinter(outputFile)) {
+
+            final Stream<NodeProperty> properties = nodes
+                .stream()
+                .flatMap(this::getNodePropertyRecords);
+
+            printRecords(printer, properties);
+        }
+    }
+
+    /**
+     * Return stream of relationship property records. Might be empty if the relationship should not be included in the
+     * property dump, for example when relationship is not of the proper type, or has start and end nodes that are not
+     * part of the pangenome as such (i.e. not a pangenome, genome, sequence or nucleotide/degenerate node).
+     * @param r relationship to create property records for.
+     * @return stream of relationship property records. Might be emtpy.
+     */
+    private Stream<RelationshipProperty> getRelationshipPropertyRecords(GraphDatabaseService GRAPH_DB, Relationship r) {
+        // TODO: we're retrieving the start and end node labels twice, as well as the relationship's type
+        // TODO: refactor to method returning optional<relationship type, start node label, end node label>?
+
+        // Verify we should include the relationship by checking it's type, and start and end node labels
+        if (!isValidRelationshipType(r.getType()))
+            return Stream.empty();
+
+        final Node startNode = GRAPH_DB.getNodeById(r.getStartNodeId());
+        final Optional<NodeUUIDAndLabel> startNodeUUIDAndLabel = getNodeUuidAndLabel(startNode);
+        if (!startNodeUUIDAndLabel.isPresent())
+            return Stream.empty();
+
+        final Node endNode = GRAPH_DB.getNodeById(r.getEndNodeId());
+        final Optional<NodeUUIDAndLabel> endNodeUUIDAndLabel = getNodeUuidAndLabel(endNode);
+        if (!endNodeUUIDAndLabel.isPresent())
+            return Stream.empty();
+
+        // Generate record for each property
+
+        final Map<String, Object> properties = r.getAllProperties();
+        if (properties.isEmpty()) {
+            // Some relationships do not have any properties, such as those going from pangenome to genome nodes,
+            // or genome to sequence nodes. In these cases, the property key and value are null.
+
+            // TODO: combine start and end node optionals instead of using get()
+
+            return Stream.of(
+                new RelationshipProperty(
+                    r.getType().name(),
+                    startNodeUUIDAndLabel.get().getLabel().name(),
+                    startNodeUUIDAndLabel.get().getUuid(),
+                    endNodeUUIDAndLabel.get().getLabel().name(),
+                    endNodeUUIDAndLabel.get().getUuid(),
+                    "",
+                    ""
+                )
+            );
+        }
+
+        // TODO: refactor duplicate code
+        return properties
+            .entrySet()
+            .stream()
+            .map(entity -> new RelationshipProperty(
+                r.getType().name(),
+                startNodeUUIDAndLabel.get().getLabel().name(),
+                startNodeUUIDAndLabel.get().getUuid(),
+                endNodeUUIDAndLabel.get().getLabel().name(),
+                endNodeUUIDAndLabel.get().getUuid(),
+                entity.getKey(),
+                formatPropertyValue(entity.getValue())
+            ));
+    }
+
+    /**
+     * Return a stream of node property records for writing. For each node, we first retrieve
+     * the node's UUID and label, as an optional. If the optional is empty we return an empty list. This will happen
+     * when the node should not be included in the node properties dump, as determined by its label set. If the optional
+     * is non-empty, we iterate over the properties of the node, excluding those that are not relevant, and generate
+     * a record for each.
+     * @param node node to generate property records for.
+     * @return stream with property records, or empty stream if node should not be included in the dump.
+     */
+    private Stream<NodeProperty> getNodePropertyRecords(Node node) {
+        return getNodeUuidAndLabel(node)
+            .map(uuidAndLabel -> skipNodeProperties(node.getAllProperties(), uuidAndLabel.getLabel())
+                .entrySet()
+                .stream()
+                .map(entry -> new NodeProperty(
+                    uuidAndLabel.getUuid(),
+                    uuidAndLabel.getLabel().name(),
+                    entry.getKey(),
+                    formatPropertyValue(entry.getValue())
+                ))
+            )
+            .orElse(Stream.empty());
+    }
+
+    /**
+     * Return a property map, skipping node properties that are not relevant or are used for generating UUIDs or labels.
+     *
+     * @param properties property map from node.
+     * @param label most significant node label.
+     * @return map excluding node properties.
+     */
+    private Map<String, Object> skipNodeProperties(Map<String, Object> properties, Label label) {
+        final Set<String> propertiesToSkip = NODE_PROPERTIES_TO_SKIP.get(label);
+        if (propertiesToSkip == null)
+            throw new IllegalArgumentException("cannot find node properties to skip for label " + label);
+
+        return properties
+            .entrySet()
+            .stream()
+            .filter(entry -> !propertiesToSkip.contains(entry.getKey()))
+            .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+    }
+
+
+
+    /**
+     * Indicates whether the relationship's type is valid, meaning whether it is eligible for inclusion in the
+     * relationship property dump. Other checks are those on the node labels in {@link #getRelationshipPropertyRecords}.
+     * @param type the type of relationship.
+     * @return true if the relationship might be included in the relationship property dump, false if not.
+     * // TODO: different name would be clearer, but this is not the only criterion, see getRelationshipPropertyRecords
+     */
+    private boolean isValidRelationshipType(RelationshipType type) {
+        return VALID_RELATIONSHIP_TYPE_NAMES.contains(type.name());
+    }
+
+    /**
+     * Return the node's UUID and label. The node's label is the most signicant label from the node's label set. Only
+     * pangenome, genome, sequence, nucleotide and degenerate nodes are processed, other types of nodes are returned
+     * as an empty optional. The node's UUID is dependent on the label.
+     * This method expects to be run inside a transaction to retrieve node labels and properties.
+     * {@link IllegalArgumentException} if the label set is empty.
+     * @param node node to return UUID and label for.
+     * @return empty if node does not have relevant labels, optional if it does.
+     */
+    private Optional<NodeUUIDAndLabel> getNodeUuidAndLabel(Node node) {
+        final Set<Label> labels = getNodeLabelsAsSet(node);
+        if (labels.isEmpty())
+            throw new IllegalArgumentException("no labels for node " + node);
+
+        if (labels.contains(pangenome_label))
+            return Optional.of(new NodeUUIDAndLabel(pangenome_label.name(), pangenome_label));
+
+        if (labels.contains(genome_label)) {
+            // Strip directories from path to genome files - only filenames need to match
+            final String filename = Paths.get(node.getProperty("path").toString()).getFileName().toString();
+            return Optional.of(new NodeUUIDAndLabel(filename, genome_label));
+        }
+
+        if (labels.contains(sequence_label))
+            return Optional.of(new NodeUUIDAndLabel(node.getProperty("identifier").toString(), sequence_label));
+
+        // TODO: checking for degenerate label should come before nucleotide label, but order should not be significant
+        if (labels.contains(degenerate_label))
+            return Optional.of(new NodeUUIDAndLabel(node.getProperty("sequence").toString(), degenerate_label));
+
+        if (labels.contains(nucleotide_label))
+            return Optional.of(new NodeUUIDAndLabel(node.getProperty("sequence").toString(), nucleotide_label));
+
+        return Optional.empty();
+    }
+
+    /**
+     * Convenience function to turn Neo4j's Label iterable into a set, which makes downstream functionality easier
+     * to read.
+     * This method expects to be run inside a transaction to retrieve node labels and properties.
+     * @param node node to collect labels for.
+     * @return set of labels.
+     */
+    private Set<Label> getNodeLabelsAsSet(Node node) {
+        return Streams.stream(node.getLabels()).collect(Collectors.toSet());
+    }
+
+    /**
+     * Format value of property of a string. Returns null if object is null, will convert integer and long arrays to
+     * strings, with elements of long arrays suffixed by an 'L' character (e.g. '1L,2L'), otherwise calls toString().
+     * @param object value.
+     * @return string representation of property value.
+     */
+    private String formatPropertyValue(Object object) {
+        if (object == null)
+            return null;
+
+        if (object instanceof int[])
+            return Arrays
+                .stream(((int[]) object))
+                .mapToObj(Integer::toString)
+                .collect(Collectors.joining(","));
+        else if (object instanceof long[])
+            return Arrays
+                .stream(((long[]) object))
+                .mapToObj(l -> l + "L")
+                .collect(Collectors.joining(","));
+
+        return object.toString();
+    }
+
+    /**
+     * Export sequence nodes' anchors to a file.
+     */
+    private void exportSequenceAnchors(Path outputFile) throws IOException {
+        try (Transaction ignored = GRAPH_DB.beginTx();
+             ResourceIterator<Node> nodes = GRAPH_DB.findNodes(sequence_label);
+             CSVPrinter printer = getCSVPrinter(outputFile)) {
+
+           final Stream<SequenceAnchor> anchors = nodes
+                .stream()
+                .flatMap(node -> getSequenceAnchorRecords(GRAPH_DB, node));
+
+           printRecords(printer, anchors);
+        }
+    }
+
+    /**
+     * Print all records in a stream to a CSVPrinter.
+     * @param printer CSVPrinter to print to.
+     * @param records stream of records.
+     * @throws IOException in case of write error.
+     */
+    private void printRecords(CSVPrinter printer, Stream<? extends Record> records) throws IOException {
+        final Iterator<? extends Record> iterator = records.iterator();
+        while (iterator.hasNext()) {
+            final Record record = iterator.next();
+            printer.printRecord(record.asList());
+        }
+    }
+
+    /**
+     * Return a CSV printer to the provided path.
+     * @param path file to write to.
+     */
+    private CSVPrinter getCSVPrinter(Path path) throws IOException {
+        return new CSVPrinter(Files.newBufferedWriter(path), CSVFormat.DEFAULT);
+    }
+
+    /**
+     * Get sequence anchor records for a sequence node, an anchor being a combination of:
+     * <p>
+     * 1. A nucleotide node ID;
+     * 2. The base pair position;
+     * 3. The nucleotide node's side.
+     * </p>
+     * @param node sequence node to generate records for.
+     * @return stream of sequence anchor records.
+     */
+    private Stream<SequenceAnchor> getSequenceAnchorRecords(GraphDatabaseService GRAPH_DB, Node node) {
+        final Map<String, Object> properties = node.getAllProperties();
+
+        // TODO: a stream implementation would be nice, but zipping three streams into one is messy and unreadable
+        // TODO: use NodeUUIDAndLabel for node's UUID, instead of sequence directly
+
+        final int[] positions = (int[]) properties.get("anchor_positions");
+        final long[] nodeIds = (long[]) properties.get("anchor_nodes");
+        final String sides = (String) properties.get("anchor_sides");
+
+        if ((positions.length != nodeIds.length) || (nodeIds.length != sides.length()))
+            throw new IllegalArgumentException("positions, nodes and sides not of equal length for node " + node);
+
+        final String identifier = (String) properties.get("identifier");
+
+        return IntStream
+            .range(0, positions.length)
+            .mapToObj(i -> new SequenceAnchor(
+                identifier,
+                i,
+                positions[i],
+                getNucleotideNodeSequence(GRAPH_DB, nodeIds[i]),
+                sides.substring(i, i + 1)
+            ));
+    }
+
+    /**
+     * Return a nucleotide node's sequence. Does not check whether the
+     * referenced node is actually a nucleotide node.
+     * @param GRAPH_DB graph database service.
+     * @param nodeId ID of the nucleotide node.
+     * @return the node's sequence.
+     */
+    private String getNucleotideNodeSequence(GraphDatabaseService GRAPH_DB, long nodeId) {
+        return (String) GRAPH_DB.getNodeById(nodeId).getProperty("sequence");
+    }
+
+
+    /**
+     * Return a new graph database service for the Neo4j database at the
+     * provided path. A shutdown hook is configured to make sure the database
+     * is always shut down correctly.
+     * @param neo4jDatabasePath path to Neo4j database.
+     * @return graph database service.
+     */
+    private GraphDatabaseService createGraphDatabaseService(Path neo4jDatabasePath) {
+        final GraphDatabaseService GRAPH_DB = new GraphDatabaseFactory()
+            .newEmbeddedDatabaseBuilder(neo4jDatabasePath.toFile())
+            .setConfig(GraphDatabaseSettings.keep_logical_logs, "4 files")
+            .newGraphDatabase();
+
+        registerShutdownHook(GRAPH_DB);
+        return GRAPH_DB;
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/export/records/NodeProperty.java b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/NodeProperty.java
new file mode 100644
index 0000000000000000000000000000000000000000..886642b2e6a2be8b1828a449328ebf7f59edd442
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/NodeProperty.java
@@ -0,0 +1,25 @@
+package nl.wur.bif.pantools.pangenome.export.records;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Stores a Neo4j node property: a UUID, node label, and the node's property
+ * key and value.
+ */
+public class NodeProperty implements Record {
+    private final String uuid, label, key, value;
+
+    public NodeProperty(String uuid, String label, String key, String value) {
+        this.uuid = uuid;
+        this.label = label;
+        this.key = key;
+        this.value = value;
+    }
+
+    @Override
+    public List<String> asList() {
+        return ImmutableList.of(uuid, label, key, value);
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/export/records/Record.java b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/Record.java
new file mode 100644
index 0000000000000000000000000000000000000000..a5cb38dddc64bd15e285dd2e89cd66f707312266
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/Record.java
@@ -0,0 +1,10 @@
+package nl.wur.bif.pantools.pangenome.export.records;
+
+import java.util.List;
+
+/**
+ * Record interface to be used for dumping stuff to CSV.
+ */
+public interface Record {
+    List<String> asList();
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/export/records/RelationshipProperty.java b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/RelationshipProperty.java
new file mode 100644
index 0000000000000000000000000000000000000000..cae647f5a252876307052743850ba5d670a22e5e
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/RelationshipProperty.java
@@ -0,0 +1,32 @@
+package nl.wur.bif.pantools.pangenome.export.records;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Stores type of relation, label of start and end node, as well as their
+ * UUIDs, and the key and value of the property.
+ */
+public class RelationshipProperty implements Record{
+    private final String type;
+    private final String startNodeLabel, startNodeUuid;
+    private final String endNodeLabel, endNodeUuid;
+    private final String key, value;
+
+    public RelationshipProperty(String type, String startNodeLabel, String startNodeUuid, String endNodeLabel, String endNodeUuid, String key, String value) {
+        this.type = type;
+        this.startNodeLabel = startNodeLabel;
+        this.startNodeUuid = startNodeUuid;
+        this.endNodeLabel = endNodeLabel;
+        this.endNodeUuid = endNodeUuid;
+        this.key = key;
+        this.value = value;
+    }
+
+    @Override
+    public List<String> asList() {
+        return ImmutableList.of(type, startNodeLabel, startNodeUuid, endNodeLabel, endNodeUuid, key, value);
+    }
+}
+
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/export/records/SequenceAnchor.java b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/SequenceAnchor.java
new file mode 100644
index 0000000000000000000000000000000000000000..14af01c5d26b9cc4470ec55a290216710fe7e0d1
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/export/records/SequenceAnchor.java
@@ -0,0 +1,37 @@
+package nl.wur.bif.pantools.pangenome.export.records;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Record class for implementing a sequence anchor. Sequence anchors are stored
+ * in sequence nodes, as three arrays: base pair positions, nucleotide node IDs,
+ * and nucleotide node sides. This class packs each element at position i of
+ * these three arrays into one.
+ */
+public class SequenceAnchor implements Record {
+    private final String identifier;
+    private final int index, position;
+    private final String nodeSequence;
+    private final String side;
+
+    public SequenceAnchor(String identifier, int index, int position, String nodeSequence, String side) {
+        this.identifier = identifier;
+        this.index = index;
+        this.position = position;
+        this.nodeSequence = nodeSequence;
+        this.side = side;
+    }
+
+    @Override
+    public List<String> asList() {
+        return ImmutableList.of(
+            identifier,
+            Integer.toString(index),
+            Integer.toString(position),
+            nodeSequence,
+            side
+        );
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Anchors.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Anchors.java
new file mode 100644
index 0000000000000000000000000000000000000000..a80bd8ba8775e5c329c3d8a2e1c0d22b84627bdc
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Anchors.java
@@ -0,0 +1,51 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Stores anchor nodes, positions and sides during localization. This is a convenience class for aggregating these three
+ * elements into a single class, rather than keeping an array for each as in the previous implementation.
+ */
+public class Anchors {
+    private final List<Anchor> anchors;
+
+    private static class Anchor {
+        public final long nodeId;
+        public final int position;
+        public final char side;
+
+        public Anchor(long nodeId, int position, char side) {
+            this.nodeId = nodeId;
+            this.position = position;
+            this.side = side;
+        }
+    }
+
+    public Anchors() {
+        // TODO: pre-allocate
+        anchors = new ArrayList<>();
+    }
+
+    public void add(long nodeId, int position, char side) {
+        anchors.add(new Anchor(nodeId, position, side));
+    }
+
+    public long[] getNodeIds() {
+        return anchors.stream().mapToLong(anchor -> anchor.nodeId).toArray();
+    }
+
+    public int[] getPositions() {
+        return anchors.stream().mapToInt(anchor -> anchor.position).toArray();
+    }
+
+    public int size() {
+        return anchors.size();
+    }
+
+    public String getSides() {
+        final StringBuilder builder = new StringBuilder(anchors.size());
+        anchors.forEach(anchor -> builder.append(anchor.side));
+        return builder.toString();
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Buckets.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Buckets.java
new file mode 100644
index 0000000000000000000000000000000000000000..7c6f889d5fe32f9b35b1a2ec77e3e42982b8bf7b
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Buckets.java
@@ -0,0 +1,62 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import com.esotericsoftware.kryo.kryo5.Kryo;
+import com.esotericsoftware.kryo.kryo5.io.Output;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Function;
+
+import static nl.wur.bif.pantools.pangenome.parallel.KryoUtils.getKryo;
+
+/**
+ * Convenience class for opening and automatically closing (through {@link AutoCloseable}) Kryo output files (buckets).
+ * Each localization object is hashed to a key, and written to the bucket with index equal to the hash key % #buckets.
+ */
+public class Buckets implements AutoCloseable {
+    private final List<Path> paths;
+    private final List<Output> outputs;
+    private final int numBuckets;
+    private final Kryo kryo;
+    private long numLocalizations;
+
+    public Buckets(Path outputDirectory, int numBuckets) throws IOException {
+        paths = new ArrayList<>(numBuckets);
+        outputs = new ArrayList<>(numBuckets);
+        this.numBuckets = numBuckets;
+        // TODO: pass in Kryo object?
+        kryo = getKryo();
+        numLocalizations = 0;
+
+        for (int i = 0; i < numBuckets; i++) {
+            final Path path = outputDirectory.resolve(String.format("bucket-%05d.kryo", i));
+            // TODO: make compression format and level configurable
+            paths.add(path);
+            outputs.add(KryoUtils.createOutput(path, 1));
+        }
+    }
+
+    public void write(Localization localization, Function<Localization, Long> hasher) {
+        final long key = hasher.apply(localization);
+        final int bucketIndex = (int) (key % (long) numBuckets);
+        kryo.writeObject(outputs.get(bucketIndex), localization);
+        numLocalizations++;
+    }
+
+    public long getNumLocalizations() {
+        return numLocalizations;
+    }
+
+    public List<Path> getPaths() {
+        return paths;
+    }
+
+    @Override
+    public void close() throws Exception {
+        for (Output output : outputs) {
+            output.close();
+        }
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/KryoUtils.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/KryoUtils.java
new file mode 100644
index 0000000000000000000000000000000000000000..99ed0749ed8601e44692826d3d02b1a62775eb93
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/KryoUtils.java
@@ -0,0 +1,40 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import com.esotericsoftware.kryo.kryo5.Kryo;
+import com.esotericsoftware.kryo.kryo5.io.Input;
+import com.esotericsoftware.kryo.kryo5.io.Output;
+import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Utility functions for creating Kryo inputs and outputs. By default will create outputs with XZ compression.
+ */
+public class KryoUtils {
+    private static final int DEFAULT_XZ_PRESET = 6;
+
+    public static Kryo getKryo() {
+        final Kryo kryo = new Kryo();
+        kryo.register(Localization.class);
+        return kryo;
+    }
+
+    public static Output createOutput(Path path) throws IOException {
+        // Default buffer size for Kryo is 4kb, see:
+        // https://github.com/EsotericSoftware/kryo/blob/e7170a1da8de84dfc5214a7d20029e33d10b3e8a/src/com/esotericsoftware/kryo/io/Output.java#L81
+        return new Output(new XZCompressorOutputStream(Files.newOutputStream(path), DEFAULT_XZ_PRESET));
+    }
+
+    public static Output createOutput(Path path, int preset) throws IOException {
+        // Default buffer size for Kryo is 4kb, see:
+        // https://github.com/EsotericSoftware/kryo/blob/e7170a1da8de84dfc5214a7d20029e33d10b3e8a/src/com/esotericsoftware/kryo/io/Output.java#L81
+        return new Output(new XZCompressorOutputStream(Files.newOutputStream(path), preset));
+    }
+
+    public static Input createInput(Path path) throws IOException {
+        return new Input(new XZCompressorInputStream(Files.newInputStream(path)));
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Localization.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Localization.java
new file mode 100644
index 0000000000000000000000000000000000000000..7e7126b2dae31df3c9082091617686684b76f75c
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/Localization.java
@@ -0,0 +1,83 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+/***
+ * This class stores updates to the localization information stored in the pangenome graph. It is also used to calculate
+ * the frequency with which each nucleotide occurs in each genome. Updates are written out during the localization
+ * process, sorted and aggregated, and subsequently written out to the pangenome graph.
+ * <br/>
+ * For sequence 'sequenceIndex' of genome 'genomeIndex', at offset 'offset', the sequence moves from the nucleotide node
+ * at the source of a relationship (of type FF, FR, RF or RR) with ID 'relationshipId' to the nucleotide node with ID
+ * 'endNodeId'.
+ */
+class Localization implements Serializable {
+    private final long relationshipId;
+    private final int genomeIndex;
+    private final int sequenceIndex;
+    private final int offset;
+    private final long endNodeId;
+
+    public Localization(long relationshipId, int genomeIndex, int sequenceIndex, int offset, long endNodeId) {
+        this.relationshipId = relationshipId;
+        this.genomeIndex = genomeIndex;
+        this.sequenceIndex = sequenceIndex;
+        this.offset = offset;
+        this.endNodeId = endNodeId;
+    }
+
+    // No-args constructor needed for Kryo
+    @SuppressWarnings("unused")
+    public Localization() {
+        relationshipId = -1;
+        genomeIndex = -1;
+        sequenceIndex = -1;
+        offset = -1;
+        endNodeId = -1;
+    }
+
+    public long getRelationshipId() {
+        return relationshipId;
+    }
+
+    public int getGenomeIndex() {
+        return genomeIndex;
+    }
+
+    public int getSequenceIndex() {
+        return sequenceIndex;
+    }
+
+    public int getOffset() {
+        return offset;
+    }
+
+    public long getEndNodeId() {
+        return endNodeId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        Localization that = (Localization) o;
+        return relationshipId == that.relationshipId && genomeIndex == that.genomeIndex && sequenceIndex == that.sequenceIndex && offset == that.offset && endNodeId == that.endNodeId;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(relationshipId, genomeIndex, sequenceIndex, offset, endNodeId);
+    }
+
+    @Override
+    public String toString() {
+        return "LocalizationUpdate{" +
+            "relationshipId=" + relationshipId +
+            ", genomeIndex=" + genomeIndex +
+            ", sequenceIndex=" + sequenceIndex +
+            ", offset=" + offset +
+            ", endNodeId=" + endNodeId +
+            '}';
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/LocalizationsWriterTask.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/LocalizationsWriterTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..8b5b291c39502078572d119f4281ebc20b76516a
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/LocalizationsWriterTask.java
@@ -0,0 +1,102 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import com.esotericsoftware.kryo.kryo5.Kryo;
+import com.esotericsoftware.kryo.kryo5.io.Input;
+import org.apache.commons.lang3.tuple.Triple;
+import org.apache.logging.log4j.Logger;
+import org.neo4j.graphdb.Relationship;
+import org.neo4j.graphdb.Transaction;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
+
+import static nl.wur.bif.pantools.pangenome.parallel.KryoUtils.getKryo;
+import static nl.wur.bif.pantools.utils.Globals.GRAPH_DB;
+
+/**
+ * Task for writing out localization information to relationships in the form of zero or more properties that store the
+ * base pair offsets for a sequence. This task reads a list of {@link Localization} items from a file, groups them by
+ * relationship ID, genome index and sequence index and aggregates the sorted base pair offsets before writing them out
+ * to a relationship property.
+ */
+public class LocalizationsWriterTask implements Callable<Long> {
+    private final Path path;
+    private final Logger log;
+    private final long maximumBatchSize;
+
+    public LocalizationsWriterTask(Path path, Logger log, long maximumTransactionSize) {
+        this.path = path;
+        this.log = log;
+        this.maximumBatchSize = maximumTransactionSize;
+    }
+
+    @Override
+    public Long call() throws Exception {
+        //noinspection resource
+        final Transaction[] tx = {GRAPH_DB.beginTx()};
+
+        AtomicLong counter = new AtomicLong(0);
+        try {
+            final List<Localization> localizations = loadLocalizations(path, getKryo());
+            log.debug(String.format("Loaded %,d localizations from file %s", localizations.size(), path));
+
+            if (localizations.isEmpty())
+                return 0L;
+
+            // TODO: optimize memory by looping
+            // TODO: substitute triple with separate type
+            final Map<Triple<Long, Integer, Integer>, List<Localization>> grouped = localizations
+                .stream()
+                .collect(Collectors.groupingBy(l -> Triple.of(l.getRelationshipId(), l.getGenomeIndex(), l.getSequenceIndex())));
+
+            grouped.forEach((key, ls) -> {
+                // TODO: how memory-efficient is this?
+                final int[] offsets = ls.stream().mapToInt(Localization::getOffset).toArray();
+                Arrays.sort(offsets);
+
+                final Relationship relationship = GRAPH_DB.getRelationshipById(key.getLeft());
+                // TODO: genome-sequence identifier to custom type
+                final String propertyName = "G" + key.getMiddle() + "S" + key.getRight();
+                relationship.setProperty(propertyName, offsets);
+
+                counter.getAndIncrement();
+                if (counter.get() % maximumBatchSize == 0) {
+                    log.trace(String.format("Committing transaction of size %,d", maximumBatchSize));
+                    tx[0].success();
+                    tx[0].close();
+                    //noinspection resource
+                    tx[0] = GRAPH_DB.beginTx();
+                }
+            });
+        } catch (IOException e) {
+            throw new RuntimeException("error creating Kryo input for path " + path + ": " + e);
+        } finally {
+            // TODO: debug message for last transaction
+            tx[0].success();
+            tx[0].close();
+        }
+        log.debug(String.format("Written %,d properties for bucket %s", counter.get(), path));
+
+        // TODO: return more involved metrics
+        return counter.get();
+    }
+
+    // TODO: remove duplicate code
+    public List<Localization> loadLocalizations(Path path, Kryo kryo) throws IOException {
+        final List<Localization> updates = new ArrayList<>();
+
+        try (final Input input = KryoUtils.createInput(path)) {
+            while (!input.end())
+                updates.add(kryo.readObject(input, Localization.class));
+        }
+
+        return updates;
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/LocalizeNodesParallel.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/LocalizeNodesParallel.java
new file mode 100644
index 0000000000000000000000000000000000000000..6771419689cfa1d69fb4087c6662f5bbe8c2015f
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/LocalizeNodesParallel.java
@@ -0,0 +1,473 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import com.esotericsoftware.kryo.kryo5.Kryo;
+import com.esotericsoftware.kryo.kryo5.io.Input;
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import nl.wur.bif.pantools.pantools.Pantools;
+import org.apache.commons.io.FileUtils;
+import org.apache.logging.log4j.Logger;
+import org.neo4j.graphdb.Node;
+import org.neo4j.graphdb.ResourceIterator;
+import org.neo4j.graphdb.Transaction;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import static nl.wur.bif.pantools.pangenome.parallel.KryoUtils.getKryo;
+import static nl.wur.bif.pantools.utils.Globals.*;
+
+/**
+ * Primary class for running localization of the pangenome. The localization step is subdivided into five stages:
+ * <p>
+ * 1. Traversing the pangenome graph and writing updates relevant to localization to one or more files; 2. Sorting the
+ * localization files from stage 1 into a number of buckets (files) by relationship to derive localization information;
+ * 3. Writing out the contents of the buckets from stage 2 to the relationships in the pangenome graph; 4. Sorting the
+ * localization files from stage 1 into a number of buckets (files) by node to derive node-genome frequencies; 5.
+ * Writing out the contents of the buckets from stage 4 to the the nodes in the pangenome graph.
+ */
+public class LocalizeNodesParallel {
+    public final int DEFAULT_EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT_SECONDS = 10;
+
+    private final Path scratchDirectory;
+    private final int numBuckets;
+    private final int transactionSize;
+    private final int numDbWriterThreads;
+    private final int nodePropertiesCacheSize;
+    private final boolean keepIntermediateFiles;
+
+    // TODO: map environment variables and defaults to configuration object
+    public LocalizeNodesParallel(Path scratchDirectory, int numBuckets, int transactionSize,
+                                 int numDbWriterThreads, int nodePropertiesCacheSize, boolean keepIntermediateFiles) {
+        this.scratchDirectory = scratchDirectory;
+        this.numBuckets = numBuckets;
+        this.transactionSize = transactionSize;
+        this.numDbWriterThreads = numDbWriterThreads;
+        this.nodePropertiesCacheSize = nodePropertiesCacheSize;
+        this.keepIntermediateFiles = keepIntermediateFiles;
+    }
+
+    /**
+     * Create output directory for storing the localization output generated in stage 1.
+     *
+     * @return path to output directory for localization output.
+     * @throws IOException if directory cannot be created.
+     */
+    public Path createLocalizationOutputDirectory() throws IOException {
+        final Path path = scratchDirectory.resolve("localizations");
+        Files.createDirectory(path);
+        return path;
+    }
+
+    /**
+     * Create output directory for storing the output of the sorted data in stage 2.
+     *
+     * @return path to output directory for stage 2 output.
+     * @throws IOException if directory cannot be created.
+     */
+    public Path createLocalizationBucketsOutputDirectory() throws IOException {
+        final Path path = scratchDirectory.resolve("offset-buckets");
+        Files.createDirectory(path);
+        return path;
+    }
+
+    /**
+     * Main method for localizing the pangenome. A scratch directory needs to be provided that will be used to store
+     * temporary data in (must exist). Intermediate files will be removed afterwards or retained if specified with the
+     * CLI.
+     *
+     * @param scratchDirectory path to temporary ('scratch') directory to store intermediate files in.
+     * @return highest genome frequency across all nucleotide nodes.
+     */
+    public long run(Path scratchDirectory) throws Exception {
+        final long start = System.currentTimeMillis();
+        final Logger log = Pantools.logger;
+
+        log.info("Scratch directory: " + scratchDirectory);
+
+        // TODO: pre-flight check for creating #buckets?
+        // TODO: combine stage 1 and 2 by directly sorting into offset buckets?
+        // TODO: stage 2 and 4 can be run in parallel, if allowed sufficient number of open files
+        // Stage 1: localize and write out updates to a file per sequence
+
+        log.info("Stage 1: localizing sequence nodes");
+        long stageStart = System.currentTimeMillis();
+        final Path localizationOutputDirectory = createLocalizationOutputDirectory();
+        final List<Path> localizationFiles = localizeNodes(localizationOutputDirectory, log);
+        log.info(String.format("Finished stage 1 in %,d ms.", System.currentTimeMillis() - stageStart));
+
+        // TODO: display number of localizations
+        // TODO: create timing method
+        // TODO: depending on stage we can ditch some of the localization information to save space and processing time
+
+        // Stage 2: sort localizations into buckets hashed by relationship ID
+
+        stageStart = System.currentTimeMillis();
+        log.info("Stage 2: sorting localizations into buckets");
+        final Path localizationBucketsDirectory = createLocalizationBucketsOutputDirectory();
+        final List<Path> localizationBuckets = sortIntoLocalizationOffsetsBuckets(localizationFiles, localizationBucketsDirectory, numBuckets, log);
+        log.info(String.format("Finished stage 2 in %,d ms.", System.currentTimeMillis() - stageStart));
+
+        // Stage 3: read localization updates from stage 2, grouping into base pair offsets and writing them out
+
+        stageStart = System.currentTimeMillis();
+        log.info("Stage 3: writing localizations to the database");
+        writeLocalizationOffsetsToDatabase(localizationBuckets, numDbWriterThreads, log);
+        log.info(String.format("Finished stage 3 in %,d ms.", System.currentTimeMillis() - stageStart));
+
+        // Stage 4: sort localizations into buckets hashed by end node ID
+
+        stageStart = System.currentTimeMillis();
+        log.info("Stage 4: sorting nodes into buckets");
+        final Path nodeBucketsDirectory = createNodeBucketsOutputDirectory(scratchDirectory);
+        final List<Path> nodeBuckets = sortIntoNodeBuckets(localizationFiles, nodeBucketsDirectory, numBuckets, log);
+        log.info(String.format("Finished stage 4 in %,d ms.", System.currentTimeMillis() - stageStart));
+
+        // Stage 5: write out node frequencies
+
+        stageStart = System.currentTimeMillis();
+        log.info("Stage 5: writing genome frequencies to nodes");
+        final long highestFrequency = writeGenomeFrequenciesToDatabase(nodeBuckets, numDbWriterThreads, log);
+        log.info(String.format("Finished stage 5 in %,d ms.", System.currentTimeMillis() - stageStart));
+        log.info(String.format("Highest genome frequency for node: %,d", highestFrequency));
+
+        // Clean up
+
+        if (!keepIntermediateFiles) {
+            final List<Path> filesToCleanUp = new ArrayList<>(localizationFiles);
+            filesToCleanUp.addAll(localizationBuckets);
+            filesToCleanUp.addAll(nodeBuckets);
+            cleanup(filesToCleanUp, log);
+        }
+
+        log.info(String.format("Finished localization phase in %,d ms.", System.currentTimeMillis() - start));
+
+        return highestFrequency;
+    }
+
+    /**
+     * Create output directory for storing the output of the sorted data in stage 4.
+     *
+     * @param scratchDirectory scratch directory to create output directory in.
+     * @return path to output directory for stage 4 output.
+     * @throws IOException if directory cannot be created.
+     */
+    public Path createNodeBucketsOutputDirectory(Path scratchDirectory) throws IOException {
+        final Path path = scratchDirectory.resolve("node-buckets");
+        Files.createDirectory(path);
+        return path;
+    }
+
+    /**
+     * Return the node properties least-frequently used (LFU) cache. Size is determined by an environment variable or
+     * the default. Maps a nucleotide node ID to a {@link NodeProperties} object storing the node's address (genome
+     * index, sequence index, base pair offset) and its sequence length.
+     *
+     * @return Caffeine LFU cache mapping nucleotide node IDs to node properties.
+     */
+    public Cache<Long, NodeProperties> getNodePropertiesCache() {
+        return Caffeine.newBuilder()
+            .maximumSize(nodePropertiesCacheSize)
+            .recordStats()
+            .build();
+    }
+
+    /**
+     * Entry point for stage 1: localizing the nucleotide nodes and writing localization information to a file per
+     * sequence.
+     *
+     * @param outputDirectory directory to write output files to, must exist.
+     * @param log             logger.
+     * @return list of paths, each path pointing to the localizations files for a sequence.
+     */
+    private List<Path> localizeNodes(Path outputDirectory, Logger log) throws Exception {
+        final int numThreads = THREADS;
+        final Cache<Long, NodeProperties> cache = getNodePropertiesCache();
+        log.info("Using node properties cache of size " + nodePropertiesCacheSize);
+
+        final List<Node> sequenceNodes = getSequenceNodes();
+
+        log.info(String.format("Localizing %,d sequence nodes with %d threads", sequenceNodes.size(), numThreads));
+        log.info("Output directory: " + outputDirectory);
+
+        final List<NodeLocalizationTask> tasks = sequenceNodes
+            .stream()
+            .map(sequenceNode -> new NodeLocalizationTask(sequenceNode, outputDirectory, cache, log))
+            .collect(Collectors.toList());
+
+        // TODO: show cache metrics
+        return runParallel(tasks, numThreads, log);
+    }
+
+    /**
+     * Return all sequence nodes in the pangenome graph as a list.
+     *
+     * @return list of all sequence nodes in pangenome graph.
+     */
+    private List<Node> getSequenceNodes() {
+        try (Transaction ignored = GRAPH_DB.beginTx();
+             ResourceIterator<Node> itr = GRAPH_DB.findNodes(sequence_label)) {
+            return itr
+                .stream()
+                .collect(Collectors.toList());
+        }
+    }
+
+    /**
+     * Sort localization files into buckets according to a specified hashing function that maps a
+     *
+     * @param localizationFiles list of localization files.
+     * @param outputDirectory   output directory to store buckets in.
+     * @param hasher            hashing function.
+     * @param numBuckets        number of buckets (files).
+     * @param log               logger.
+     * @return list of paths to the bucket files.
+     * @see nl.wur.bif.pantools.pangenome.parallel.Localization object to an integer that is used as a hashing key.
+     * Note: the integer returned by the mapper can be anything, it will be modulo'ed against the number of buckets to
+     * calculate the bucket index.
+     */
+    private List<Path> sortLocalizationFilesIntoBuckets(List<Path> localizationFiles, Path outputDirectory, Function<Localization, Long> hasher, int numBuckets, Logger log) throws Exception {
+        log.debug(String.format("Sorting %,d localization files into %,d buckets in directory %s", localizationFiles.size(), numBuckets, outputDirectory));
+
+        // TODO: make this try-with-resources
+        // TODO: class for writing
+        // TODO: parallelize
+
+        // Create buckets: path and Kryo output for each
+
+        Kryo kryo = getKryo();
+        // TODO: this logic to Buckets class?
+        try (Buckets buckets = new Buckets(outputDirectory, numBuckets)) {
+            for (int i = 0; i < localizationFiles.size(); i++) {
+                final Path path = localizationFiles.get(i);
+                try (Input input = KryoUtils.createInput(path)) {
+                    log.debug(String.format("Sorting file %s (%,d/%,d)", path, i, localizationFiles.size()));
+                    long counter = 0;
+                    while (!input.end()) {
+                        final Localization localization = kryo.readObject(input, Localization.class);
+                        buckets.write(localization, hasher);
+                        counter++;
+                    }
+                    log.debug(String.format("Sorted %,d localizations for file %s", counter, path));
+                } catch (IOException e) {
+                    throw new RuntimeException("error creating Kryo input for file " + path + ": " + e);
+                }
+            }
+
+            log.debug(String.format("Sorted %,d localizations across %,d input files into %,d buckets", buckets.getNumLocalizations(), localizationFiles.size(), numBuckets));
+            return buckets.getPaths();
+        }
+    }
+
+    /**
+     * Entry point for stage 2: sorting localization files into buckets keyed by relationship ID, genome index and
+     * sequence index.
+     *
+     * @param localizationFiles list of localization files to bucket.
+     * @param outputDirectory   output directory for buckets, must exist.
+     * @param numBuckets        number of buckets.
+     * @param log               logger.
+     * @return list of paths to buckets.
+     */
+    private List<Path> sortIntoLocalizationOffsetsBuckets(List<Path> localizationFiles, Path outputDirectory, int numBuckets, Logger log) throws Exception {
+        // TODO: hashing function to separate class or more descriptive method name
+        return sortLocalizationFilesIntoBuckets(
+            localizationFiles,
+            outputDirectory,
+            Localization::getRelationshipId,
+            numBuckets,
+            log
+        );
+    }
+
+    /**
+     * Entry point for stage 3: write localization bucket files to pangenome database. Buckets are read in parallel,
+     * sorted in-memory and written out to Neo4j with the specified level of parallelism.
+     *
+     * @param localizationBuckets paths to localization files generated in stage 2.
+     * @param numThreads          number of threads to write to Neo4j.
+     * @param log                 logger.
+     */
+    private void writeLocalizationOffsetsToDatabase(List<Path> localizationBuckets, int numThreads, Logger log) throws Exception {
+        log.info(String.format("Maximum transaction size: %,d", transactionSize));
+        log.info("Num. threads: " + numThreads);
+
+        final List<LocalizationsWriterTask> tasks = localizationBuckets
+            .stream()
+            .map(path -> new LocalizationsWriterTask(path, log, transactionSize))
+            .collect(Collectors.toList());
+
+        runParallel(tasks, numThreads, log);
+    }
+
+    /**
+     * Entry point for stage 4: sorting the localization files from stage 1 into a number of buckets (files) by node to
+     * derive node-genome frequencies.
+     *
+     * @param localizationFiles list of localization files to bucket.
+     * @param outputDirectory   output directory for buckets, must exist.
+     * @param numBuckets        number of buckets.
+     * @param log               logger.
+     * @return list of paths to buckets.
+     */
+    private List<Path> sortIntoNodeBuckets(List<Path> localizationFiles, Path outputDirectory, int numBuckets, Logger log) throws Exception {
+        return sortLocalizationFilesIntoBuckets(
+            localizationFiles,
+            outputDirectory,
+            Localization::getEndNodeId,
+            numBuckets,
+            log
+        );
+    }
+
+    /**
+     * Entry point for stage 5: writing out the contents of the buckets from stage 4 to the the nodes in the pangenome
+     * graph.
+     *
+     * @param nodeBuckets paths to localization files generated in stage 4.
+     * @param numThreads  number of threads to write to Neo4j.
+     * @param log         logger.
+     * @return highest genome frequency observed across all nucleotide nodes.
+     */
+    private long writeGenomeFrequenciesToDatabase(List<Path> nodeBuckets, int numThreads, Logger log) throws Exception {
+        final long maximumTransactionSize = transactionSize;
+        log.info(String.format("Maximum transaction size: %,d", maximumTransactionSize));
+        log.info("Num. threads: " + numThreads);
+
+        final List<NodeFrequenciesWriterTask> tasks = nodeBuckets
+            .stream()
+            .map(path -> new NodeFrequenciesWriterTask(path, log, maximumTransactionSize))
+            .collect(Collectors.toList());
+
+        final AtomicLong highestFrequency = new AtomicLong(-1);
+        runParallel(tasks, numThreads, log, frequency -> {
+            if (frequency > highestFrequency.get())
+                highestFrequency.set(frequency);
+        });
+
+        assert highestFrequency.get() != -1;
+        return highestFrequency.get();
+    }
+
+    /**
+     * Run a collection tasks with the specified level of parallelism.
+     *
+     * @param tasks      collection of callables returning a value.
+     * @param numThreads number of threads in the thread pool.
+     * @param log        logger.
+     * @param callback   method to call with each result, as they come in.
+     * @param <T>        type of return value for each task.
+     * @return list of return values, order does not correspond to that of the tasks.
+     * TODO: document use of callback vs return value
+     * TODO: re-use thread pool
+     */
+    private <T> List<T> runParallel(Collection<? extends Callable<T>> tasks, int numThreads, Logger log, Consumer<T> callback) throws Exception {
+        log.debug("Creating completion service on executor service with fixed thread pool of size " + numThreads);
+
+        // TODO: make thread pool configurable (naming threads might be useful for monitoring/troubleshooting)
+        final ExecutorService es = Executors.newFixedThreadPool(numThreads);
+        final CompletionService<T> cs = new ExecutorCompletionService<>(es);
+
+        final List<Future<T>> ts =
+            tasks
+                .stream()
+                .map(cs::submit)
+                .collect(Collectors.toList());
+
+        log.debug(String.format("Submitted %,d tasks", tasks.size()));
+
+        final List<T> outputs = new ArrayList<>(tasks.size());
+
+        // TODO: use try-with-resources?
+        boolean terminatedWithError = false;
+        Exception exception = null;
+        // TODO: to method
+        for (int i = 0; i < tasks.size(); i++) {
+            try {
+                final T result = cs.take().get();
+                outputs.add(result);
+                callback.accept(result);
+            } catch (InterruptedException e) {
+                log.error("Interrupted while waiting for tasks to finish: " + e);
+                terminatedWithError = true;
+                exception = e;
+                break;
+            } catch (ExecutionException e) {
+                // TODO: show which node localization failed
+                log.error("Error retrieving result for task: " + e);
+                terminatedWithError = true;
+                exception = e;
+                break;
+            }
+        }
+
+        // TODO: fix cleanup logic
+        if (terminatedWithError) {
+            log.info("Error(s) occurred during task processing. Cancelling all tasks");
+            // TODO: show cancel progress?
+            for (Future<T> task : ts)
+                task.cancel(true);
+        }
+
+        log.info("Shutting down executor service. Waiting for " + DEFAULT_EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT_SECONDS + " seconds");
+        es.shutdown();
+        try {
+            // TODO: make configurable
+            if (!es.awaitTermination(DEFAULT_EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS))
+                log.warn("Timeout elapsed before executor service was shut down");
+        } catch (InterruptedException e) {
+            throw new RuntimeException("interrupted while waiting on executor service to shut down: " + e);
+        }
+        log.debug("Executor service is shut down");
+
+        if (terminatedWithError)
+            throw exception;
+
+        return outputs;
+    }
+
+    /**
+     * Wrapper method for {@link LocalizeNodesParallel#runParallel(Collection, int, Logger, Consumer)} without a
+     * callback.
+     *
+     * @param tasks      collection of callables returning a value.
+     * @param numThreads number of threads in the thread pool.
+     * @param log        logger.
+     * @param <T>        type of return value for each task.
+     * @return list of return values, order does not correspond to that of the tasks.
+     */
+    private <T> List<T> runParallel(Collection<? extends Callable<T>> tasks, int numThreads, Logger log) throws Exception {
+        return runParallel(tasks, numThreads, log, t -> {
+        });
+    }
+
+    /**
+     * Clean up files by removing them. Directories housing the files are not removed.
+     *
+     * @param filesToCleanUp list of files to remove.
+     * @param log            logger to display metrics and progress information.
+     */
+    private void cleanup(List<Path> filesToCleanUp, Logger log) throws IOException {
+        // TODO: remove directories as well
+        long totalBytesCleanedUp = 0;
+
+        log.info("Cleaning up " + filesToCleanUp.size() + " intermediate files");
+        for (int i = 0; i < filesToCleanUp.size(); i++) {
+            final Path path = filesToCleanUp.get(i);
+            totalBytesCleanedUp += Files.size(path);
+
+            log.debug(String.format("Deleting file %s (%,d/%,d)", path, i + 1, filesToCleanUp.size()));
+            Files.delete(filesToCleanUp.get(i));
+        }
+
+        final String totalBytesCleanedUpHumanReadable = FileUtils.byteCountToDisplaySize(totalBytesCleanedUp);
+        log.info(String.format("Cleaned up %,d bytes (%s) across %,d files", totalBytesCleanedUp, totalBytesCleanedUpHumanReadable, filesToCleanUp.size()));
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeFrequenciesWriterTask.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeFrequenciesWriterTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..e3b0e45047cb19a251bc4c001aec9a739c30dc26
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeFrequenciesWriterTask.java
@@ -0,0 +1,118 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import com.esotericsoftware.kryo.kryo5.Kryo;
+import com.esotericsoftware.kryo.kryo5.io.Input;
+import org.apache.logging.log4j.Logger;
+import org.neo4j.graphdb.Node;
+import org.neo4j.graphdb.Transaction;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
+
+import static nl.wur.bif.pantools.pangenome.parallel.KryoUtils.getKryo;
+import static nl.wur.bif.pantools.utils.Globals.GRAPH_DB;
+import static nl.wur.bif.pantools.utils.Globals.genomeDb;
+
+/**
+ * Task for writing out genome frequency information to nucleotide nodes. This task reads a list of {@link Localization}
+ * items from a file, groups them by node ID and genome ID, and calculates the number of times this node occurs in each
+ * genome, before writing out three properties to the node:
+ * <br/>
+ * 1. `frequencies`: an array counting each time this node occurs in each genome (position 0 left 0);
+ * 2. `frequency`: the sum of the `frequencies` array;
+ * 3. `high_frequency`: flag (set to true only, not false) indicating whether the node occurs with high frequency.
+ * offsets before writing them out to a relationship.
+ */
+public class NodeFrequenciesWriterTask implements Callable<Long> {
+    private final Path path;
+    private final Logger log;
+    private final long maximumBatchSize;
+
+    // TODO: refactor to base task for adding entries to commit
+    public NodeFrequenciesWriterTask(Path path, Logger log, long maximumBatchSize) {
+        this.path = path;
+        this.log = log;
+        this.maximumBatchSize = maximumBatchSize;
+    }
+
+    @Override
+    public Long call() throws Exception {
+        final Transaction[] tx = {GRAPH_DB.beginTx()};
+
+        AtomicLong highestFrequency = new AtomicLong();
+        AtomicLong counter = new AtomicLong(0);
+        try {
+            final List<Localization> localizations = loadLocalizations(path, getKryo());
+            log.debug(String.format("Loaded %,d localizations from file %s", localizations.size(), path));
+//            localizations.sort(new LocalizationOffsetComparator());
+
+            if (localizations.isEmpty())
+                return 0L;
+
+            // TODO: optimize memory by looping
+            final Map<Long, List<Localization>> grouped = localizations
+                .stream()
+                .collect(Collectors.groupingBy(Localization::getEndNodeId));
+
+            grouped.forEach((nodeId, ls) -> {
+                final long[] frequencies = new long[genomeDb.num_genomes + 1];
+                Arrays.fill(frequencies, 0);
+
+                for (Localization l : ls)
+                    frequencies[l.getGenomeIndex()]++;
+
+                final long frequency = Arrays.stream(frequencies).sum();
+
+                if (frequency > highestFrequency.get())
+                    highestFrequency.set(frequency);
+
+                final Node node = GRAPH_DB.getNodeById(nodeId);
+                node.setProperty("frequencies", frequencies);
+                node.setProperty("frequency", frequency);
+                if (frequency >= genomeDb.num_genomes * 100L) {
+                    // TODO: do we clear previously set high_frequency property if this is not a high freqyency node?
+                    node.setProperty("high_frequency", true);
+                }
+
+                //
+
+                counter.getAndIncrement();
+                if (counter.get() % maximumBatchSize == 0) {
+                    log.trace(String.format("Committing batch of size %,d", maximumBatchSize));
+                    tx[0].success();
+                    tx[0].close();
+                    tx[0] = GRAPH_DB.beginTx();
+                }
+            });
+        } catch (IOException e) {
+            throw new RuntimeException("error creating Kryo input for path " + path + ": " + e);
+        } finally {
+            // TODO: debug message for last batch
+            tx[0].success();
+            tx[0].close();
+        }
+        log.debug(String.format("Written %,d properties for bucket %s", counter.get(), path));
+
+        // TODO: return more involved metrics
+        return highestFrequency.get();
+    }
+
+    // TODO: remove duplicate code
+    public List<Localization> loadLocalizations(Path path, Kryo kryo) throws IOException {
+        final List<Localization> updates = new ArrayList<>();
+
+        try (final Input input = KryoUtils.createInput(path)) {
+            while (!input.end())
+                updates.add(kryo.readObject(input, Localization.class));
+        }
+
+        return updates;
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeLocalizationTask.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeLocalizationTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..3727ab571dbfeaa0f645e051d40766c0965ac31f
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeLocalizationTask.java
@@ -0,0 +1,176 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import com.esotericsoftware.kryo.kryo5.Kryo;
+import com.esotericsoftware.kryo.kryo5.io.Output;
+import com.github.benmanes.caffeine.cache.Cache;
+import nl.wur.bif.pantools.sequence.SequenceScanner;
+import org.apache.logging.log4j.Logger;
+import org.neo4j.graphdb.Direction;
+import org.neo4j.graphdb.Node;
+import org.neo4j.graphdb.Relationship;
+import org.neo4j.graphdb.Transaction;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.concurrent.Callable;
+
+import static nl.wur.bif.pantools.pangenome.parallel.KryoUtils.getKryo;
+import static nl.wur.bif.pantools.utils.Globals.*;
+
+public class NodeLocalizationTask implements Callable<Path> {
+    private final Node sequenceNode;
+    private final Path outputDirectory;
+    private final Cache<Long, NodeProperties> cache;
+    private final Logger log;
+    private final SequenceScanner genomeSc;
+
+    /**
+     * Localize a sequence node. This method will follow the sequence node to its sequence and degenerate nodes, and
+     * write localization information to the specified output file. In addition, this method will write anchor
+     * information (node IDs, node sides and anchor sides) to the sequence node.
+     *
+     * @param sequenceNode    sequence node to localize.
+     * @param outputDirectory to write to.
+     * @param cache           cache with node properties to speed up localization.
+     * @param log             logger.
+     */
+    public NodeLocalizationTask(Node sequenceNode, Path outputDirectory, Cache<Long, NodeProperties> cache, Logger log) {
+        this.sequenceNode = sequenceNode;
+        this.outputDirectory = outputDirectory;
+        this.cache = cache;
+        this.log = log;
+        this.genomeSc = new SequenceScanner(genomeDb, 1, 1, K_SIZE, indexSc.get_pre_len());
+    }
+
+    /**
+     * Localize a sequence node. This method will follow the sequence node to its sequence and degenerate nodes, and
+     * write localization information to an output file whose path will be returned by this function. In addition, this
+     * method will write anchor information (node IDs, node sides and anchor sides) to the sequence node at the end of
+     * the localization process.
+     *
+     * @return path to the output file with @see nl.wur.bif.pantools.pangenome.parallel.Localization objects.
+     */
+    @Override
+    public Path call() {
+        // Adapted from GenomeLayer::localize_nodes()
+
+        final Kryo kryo = getKryo();
+        int neighbor_length = 0;
+        char node_side, neighbor_side;
+        long length, distance;
+        Node node, neighbor;
+        String rel_name, origin;
+        int[] address = new int[3], addr = null;
+        boolean found = true;
+        final Anchors anchors = new Anchors();
+
+        try (Transaction ignored = GRAPH_DB.beginTx()) {
+            origin = "G" + ((String) sequenceNode.getProperty("identifier")).replace('_', 'S');
+        }
+
+        final Path path = outputDirectory.resolve("localizations-" + origin + ".kryo.xz");
+
+        try (Transaction tx = GRAPH_DB.beginTx(); Output output = KryoUtils.createOutput(path)) {
+            address[0] = (int) sequenceNode.getProperty("genome");
+            address[1] = (int) sequenceNode.getProperty("number");
+            length = (long) sequenceNode.getProperty("length");
+
+            log.debug(String.format("Localizing node %s of length %,d (ID: %d)", origin, length, sequenceNode.getId()));
+            log.debug("Writing to output file " + path);
+
+            if (length >= K_SIZE) {
+                node = sequenceNode;
+                node_side = 'F';
+                distance = 0;
+
+                long previousTimestamp = System.currentTimeMillis();
+                for (address[2] = 0; address[2] + K_SIZE - 1 < length && found; ) { // K-1 bases of the last node not added
+                    found = false;
+
+                    for (Relationship r : node.getRelationships(Direction.OUTGOING)) {
+                        rel_name = r.getType().name();
+
+                        if (rel_name.charAt(0) != node_side)
+                            continue;
+
+                        neighbor = r.getEndNode();
+
+                        neighbor_side = rel_name.charAt(1);
+                        final boolean is_nucleotide = neighbor.hasLabel(nucleotide_label);
+                        final boolean is_degenerate = neighbor.hasLabel(degenerate_label);
+                        final boolean is_node = is_nucleotide && !is_degenerate;
+
+                        if (is_node || is_degenerate) {
+                            final Node finalNeighbor = neighbor;
+                            NodeProperties nodeProperties = cache.get(
+                                neighbor.getId(),
+                                k -> new NodeProperties(
+                                    (int[]) finalNeighbor.getProperty("address"),
+                                    (int) finalNeighbor.getProperty("length")
+                                )
+                            );
+
+                            assert nodeProperties != null;
+                            addr = nodeProperties.getAddress();
+                            neighbor_length = nodeProperties.getLength();
+                        }
+
+                        if ((is_node && genomeSc.compare(address, addr, K_SIZE - 1,
+                            neighbor_side == 'F' ? K_SIZE - 1 : neighbor_length - K_SIZE, 1, neighbor_side == 'F'))
+                            || (is_degenerate && Arrays.equals(addr, address))) {
+                            found = true;
+
+                            // Write localization
+
+                            kryo.writeObject(output, new Localization(
+                                r.getId(),
+                                address[0],
+                                address[1],
+                                address[2],
+                                r.getEndNodeId()
+                            ));
+
+                            // Update node anchors
+
+                            if (address[2] >= distance) {
+                                anchors.add(neighbor.getId(), address[2], neighbor_side);
+                                distance += ANCHORS_DISTANCE;
+                            }
+
+                            // Display progress every once in a while
+
+                            final long currentTimestamp = System.currentTimeMillis();
+                            if ((currentTimestamp - previousTimestamp) > 10000) {
+                                final String progress = String.format("%.1f", (100 * (float) address[2]) / ((float) length));
+                                log.info(String.format("At position %,d/%,d (%s%%) for node %s", address[2], length, progress, origin));
+
+                                previousTimestamp = currentTimestamp;
+                            }
+
+                            address[2] = address[2] + neighbor_length - K_SIZE + 1;
+                            node = neighbor;
+                            node_side = neighbor_side;
+                            break;
+                        }
+                    }
+                }
+                if (!found)
+                    throw new RuntimeException("cannot locate position " + address[2] + " at node with ID " + node.getId());
+
+                // Store anchors as node property
+
+                log.info(String.format("Updating node %s with %,d anchors", origin, anchors.size()));
+                sequenceNode.setProperty("anchor_nodes", anchors.getNodeIds());
+                sequenceNode.setProperty("anchor_positions", anchors.getPositions());
+                sequenceNode.setProperty("anchor_sides", anchors.getSides());
+            }
+            tx.success();
+        } catch (IOException e) {
+            // Thrown by KryoUtils.createOutput
+            throw new RuntimeException("cannot create output file at path " + path + ": " + e);
+        }
+
+        return path;
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeProperties.java b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeProperties.java
new file mode 100644
index 0000000000000000000000000000000000000000..628ecd120c1a02aa1baa1bb497a65c17ca1a46c6
--- /dev/null
+++ b/src/main/java/nl/wur/bif/pantools/pangenome/parallel/NodeProperties.java
@@ -0,0 +1,33 @@
+package nl.wur.bif.pantools.pangenome.parallel;
+
+import java.util.Arrays;
+
+/**
+ * Class for storing length and address of a nucleotide or degenerate node, used during localization phase. Address is
+ * given as an integer array of [genome index, sequence index, base pair offset].
+ */
+public class NodeProperties {
+    private final int[] address;
+    private final int length;
+
+    public NodeProperties(int[] address, int length) {
+        this.address = address;
+        this.length = length;
+    }
+
+    public int[] getAddress() {
+        return address;
+    }
+
+    public int getLength() {
+        return length;
+    }
+
+    @Override
+    public String toString() {
+        return "NodeProperties{" +
+            "address=" + Arrays.toString(address) +
+            ", length=" + length +
+            '}';
+    }
+}
diff --git a/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java b/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java
index ad48c16dc10ae59cebb5bcea8af60c429afe5c19..8d3d8b676cba9e8644d67ab3fb3e74b862d20373 100644
--- a/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java
+++ b/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java
@@ -6,23 +6,29 @@
 
 package nl.wur.bif.pantools.pantools;
 
+
 import nl.wur.bif.pantools.cli.Map;
 import nl.wur.bif.pantools.cli.*;
 import nl.wur.bif.pantools.pangenome.PangenomeGraph;
+import nl.wur.bif.pantools.utils.ConsoleInput;
 import nl.wur.bif.pantools.utils.Globals;
 import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.neo4j.unsafe.impl.batchimport.input.DataException;
 import picocli.CommandLine;
 import picocli.CommandLine.*;
+import picocli.CommandLine.Model.CommandSpec;
 
 import java.awt.*;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.nio.file.FileAlreadyExistsException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -59,7 +65,7 @@ import static picocli.CommandLine.ScopeType.INHERIT;
         RemoveAnnotations.class,
         RemoveFunctions.class,
         RemoveNodes.class,
-        RemovePhenotype.class,
+        RemovePhenotypes.class,
         RemoveGrouping.class,
         MoveGrouping.class,
         Group.class,
@@ -95,7 +101,8 @@ import static picocli.CommandLine.ScopeType.INHERIT;
         RetrieveFeatures.class,
         RetrieveRegions.class,
         MSA.class,
-        Map.class
+        Map.class,
+        ExportPangenome.class
 })
 public class Pantools {
     @Parameters(descriptionKey = "database-path", index = "0", scope = INHERIT)
@@ -107,12 +114,18 @@ public class Pantools {
     @Option(names = "--no-input", hidden = true, negatable = true, scope = INHERIT)
     boolean input;
 
+    @Option(names = {"--trace"}, hidden = true, scope = INHERIT)
+    boolean trace;
+
     @Option(names = {"-d", "--debug"}, hidden = true, scope = INHERIT)
     boolean debug;
 
     @Option(names = {"-q", "--quiet"}, hidden = true, scope = INHERIT)
     boolean quiet;
 
+    @Option(names = {"--silent"}, hidden = true, scope = INHERIT)
+    boolean silent;
+
     @Option(names = {"-V", "--version"}, versionHelp = true, hidden = true)
     boolean versionInfoRequested;
 
@@ -122,8 +135,7 @@ public class Pantools {
     @Option(names = {"-M", "--manual"}, help = true, hidden = true, scope = INHERIT)
     boolean manualRequested;
 
-    private static Logger usageLogger;
-
+    public static Logger logger;
     private PangenomeGraph pangenomeGraph;
 
     /**
@@ -180,69 +192,34 @@ public class Pantools {
             return browseManual(parseResult);
         }
         if (parseResult.hasSubcommand() && !usageHelpRequested) {
-            log4jSetup();
-            usageLogger = LogManager.getLogger(Pantools.class);
-            usageLogger.info("Usage: pantools " + String.join(" ", parseResult.originalArgs()));
             setGlobals(this);
         }
         return new CommandLine.RunLast().execute(parseResult);
     }
 
     /**
-     * Setup function for system properties used in log4j2.properties.
-     */
-    private void log4jSetup() {
-        setLog4jLevel();
-        setLog4jSaveDirectory();
-    }
-
-    /**
-     * Set the logging level for log4j console output.
-     * Quit mode only shows messages of warning level or higher.
-     * Debug mode shows messages from debug and up.
-     */
-    private void setLog4jLevel() {
-        final String level = quiet ? "warn" : debug ? "debug" : "info";
-        System.setProperty("log4j2.level", level);
-    }
-
-    /**
-     * Set the "log" directory for all logging files.
-     * If the directory does not exist, a temporary directory is created.
+     * Setup function for log4j2 logger and system properties used in log4j2.properties.
+     * The log file has the name of the called subcommand with a timestamp
+     * The log levels is set according to the user input
+     * The log folder is located in the database directory
+     * @param spec CommandsPec containing subclass specific information
+     * @exception ParameterException thrown when the given database directory does not exist or is not a directory
      */
-    private void setLog4jSaveDirectory() {
-        final Path logDirectory = databaseDirectory.resolve("log");
-        if (!Files.isDirectory(logDirectory)) {
-            try {
-                final Path tempDirectory = Files.createTempDirectory(RandomStringUtils.randomAlphanumeric(10));
-                moveLogFilesOnExit(tempDirectory, logDirectory);
-                System.setProperty("log4j.saveDirectory", tempDirectory.toString());
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        } else {
-            System.setProperty("log4j.saveDirectory", logDirectory.toString());
+    public void createLogger(CommandSpec spec) throws ParameterException {
+        // validate database directory before creating a log directory
+        if (!Files.exists(databaseDirectory) || !Files.isDirectory(databaseDirectory)) {
+            throw new ParameterException(spec.commandLine(), "Pangenome database directory does not exist.");
         }
-    }
 
-    /**
-     * Add a shutdown hook to a temporary log directory to remove it on system exit and move all files in it to a
-     * permanent directory, if created.
-     *
-     * @param tempDirectory path of temporary directory
-     * @param logDirectory path of the standard log directory
-     */
-    private void moveLogFilesOnExit(Path tempDirectory, Path logDirectory) {
-        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
-            try {
-                if (Files.isDirectory(logDirectory)) {
-                    FileUtils.copyDirectory(tempDirectory.toFile(), logDirectory.toFile());
-                }
-                FileUtils.deleteDirectory(tempDirectory.toFile());
-            } catch (IOException ex) {
-                ex.printStackTrace();
-            }
-        }));
+        // set system properties for the log4j2 configuration file
+        final String logLevel =  silent ? "error" : quiet ? "warn" : debug ? "debug" : trace ? "trace" : "info";
+        System.setProperty("log4j2.level", logLevel);
+        System.setProperty("log4j2.saveDirectory", String.valueOf(databaseDirectory.resolve("logs")));
+        System.setProperty("log4j2.logFile", spec.name());
+
+        // create logger
+        logger = LogManager.getLogger(this);
+        logger.info("Usage: pantools {}", String.join(" ", spec.commandLine().getParseResult().originalArgs()));
     }
 
     /**
@@ -305,14 +282,14 @@ public class Pantools {
 
     /**
      * Browse the default web browser to open the URL page
-     * @param url page to be opened
+     * @param urlString page to be opened
      */
-    private void browse(String url) {
+    private void browse(String urlString) {
         try {
             if (Desktop.isDesktopSupported()) {
                 Desktop desktop = Desktop.getDesktop();
                 if (desktop.isSupported(Desktop.Action.BROWSE)) {
-                    desktop.browse(URI.create(url));
+                    desktop.browse(URI.create(urlString));
                 }
             }
         } catch (IOException | InternalError e) {
@@ -325,9 +302,9 @@ public class Pantools {
      * Adds time of execution, command line arguments, run time and exit code to the usage log.
      */
     private void exitStrategy(Instant startTime) {
-        if (!(usageHelpRequested || versionInfoRequested || manualRequested || usageLogger == null)) {
+        if (!(usageHelpRequested || versionInfoRequested || manualRequested || logger == null)) {
             disconnectPangenome();
-            usageLogger.info(String.format("Run time: %s\n", getRunTime(startTime)));
+            logger.info(String.format("Run time: %s\n", getRunTime(startTime)));
         }
     }
 
@@ -393,7 +370,7 @@ public class Pantools {
             sectionHeading = help.createHeading(String.format("%n@|bold,underline %s|@%n", sectionHeading));
 
             for (CommandLine command : subcommands) {
-                final Model.CommandSpec commandSpec = command.getCommandSpec();
+                final CommandSpec commandSpec = command.getCommandSpec();
 
                 // create comma-separated list of command name and aliases
                 String names = commandSpec.names().toString();
@@ -418,7 +395,7 @@ public class Pantools {
          * @return TextTable class containing help message layout for the commands section
          */
         private Help.TextTable createTextTable(Help help) {
-            Model.CommandSpec spec = help.commandSpec();
+            CommandSpec spec = help.commandSpec();
             // prepare layout: two columns
             // the left column overflows, the right column wraps if text is too long
             int commandLength = maxLength(spec.subcommands());
@@ -477,9 +454,51 @@ public class Pantools {
         }
     }
 
+    /**
+     * Validate the given database directory path and creates a database if empty.
+     * @throws IOException When the database is not valid
+     */
+    public void createDatabaseDirectory() throws IOException {
+        validateDirectoryPath();
+        Files.createDirectories(databaseDirectory);
+    }
+
+    /**
+     * Validates the database directory path for a new directory.
+     * Cleans the directory if it contains files, considering user input.
+     * @throws IOException When the database contains files or is not a directory
+     */
+    private void validateDirectoryPath() throws IOException {
+        // check if the database already exists and is a directory
+        if (!Files.exists(databaseDirectory)) return;
+        if (!Files.isDirectory(databaseDirectory)) {
+            throw new IllegalArgumentException("Given database directory is not a directory.");
+        }
+
+        // if the directory is empty or --force is given, overwrite
+        if (force || Objects.requireNonNull(databaseDirectory.toFile().list()).length == 0) {
+            FileUtils.deleteDirectory(databaseDirectory.toFile());
+            return;
+        }
+
+        // Ask for user input to decide whether to overwrite the directory or not
+        if (input) {
+            // Ask for user input to decide whether to overwrite the directory or not
+            final ConsoleInput consoleInput = new ConsoleInput();
+            final String query = String.format("<%s> already exists and contains files, do you want to overwrite it?",
+                    databaseDirectory);
+            if (consoleInput.askYesOrNo(query)) {
+                FileUtils.deleteDirectory(databaseDirectory.toFile());
+                return;
+            }
+        }
+        throw new FileAlreadyExistsException("Given database directory already exists and contains files");
+    }
+
     public void setPangenomeGraph() {
         this.setPangenomeGraph("any");
     }
+
     public void setPangenomeGraph(String type) {
         pangenomeGraph = new PangenomeGraph(databaseDirectory);
         if (type.matches("pangenome") && !pangenomeGraph.isPangenome()) {
@@ -491,6 +510,9 @@ public class Pantools {
         Globals.GRAPH_DB = pangenomeGraph.getGraphDb();
     }
 
+    public Logger getLogger() {
+        return logger;
+    }
     public PangenomeGraph pangenomeGraph() {
         return pangenomeGraph;
     }
diff --git a/src/main/java/nl/wur/bif/pantools/utils/Globals.java b/src/main/java/nl/wur/bif/pantools/utils/Globals.java
index 1c8503f496ad19f87289615089c8bc4d126714c2..636f835fa63a7f3192ac33f13ed1cdea5ef90f52 100644
--- a/src/main/java/nl/wur/bif/pantools/utils/Globals.java
+++ b/src/main/java/nl/wur/bif/pantools/utils/Globals.java
@@ -158,7 +158,6 @@ public class Globals {
     public static String SELECTED_LABEL;
     //    public static String NODE_PROPERTY;
     public static String NODE_VALUE;
-    public static String BINS;
     public static String SELECTED_HMGROUPS;
     public static String INPUT_FILE;
     public static int grouping_version = -1;
@@ -284,13 +283,11 @@ public class Globals {
             path_array = full_path.split("/dist/");
         } else {
             //path_array = new String[0];
-            System.out.println("No .jar file found in " + full_path + "/dist/ or " + full_path + "target/");
-            return;
+            throw new RuntimeException("No .jar file found in " + full_path + "dist/ or " + full_path + "target/");
         }
 
         if (!path_array[0].endsWith("/pantools")) {
-            System.out.println("PanTools' .jar file must be located inside the pantools/dist/ or pantools/target/ directories.");
-            System.exit(1);
+            throw new RuntimeException("PanTools' .jar file must be located inside the pantools/dist/ or pantools/target/ directories.");
         }
         pantools_path = path_array[0] + "/";
     }
diff --git a/src/main/java/nl/wur/bif/pantools/utils/StringParser.java b/src/main/java/nl/wur/bif/pantools/utils/StringParser.java
index 65581b8f16d752bf32de801c7bfde734574c7fda..a64ca67ce304fb925e7db6c179b68c5fd09833ba 100644
--- a/src/main/java/nl/wur/bif/pantools/utils/StringParser.java
+++ b/src/main/java/nl/wur/bif/pantools/utils/StringParser.java
@@ -36,6 +36,30 @@ public class StringParser {
         return Arrays.stream(value.split(",")).map(Integer::parseInt).collect(Collectors.toList());
     }
 
+    /**
+     * Converts a string of comma separated integers to a list of longs. Returns an empty list if the input cannot
+     * be converted.
+     *
+     * @param value input string
+     * @return list of longs
+     */
+    public static List<Long> stringToLongList(String value) {
+        if (!value.matches("[0-9,]+") || value.contains(",,") || value.matches(",.*")) {
+            return Collections.emptyList();
+        } else {
+            return parseStringToLongList(value);
+        }
+    }
+
+    /**
+     * Converts a string of comma separated integers to a list of longs.
+     * @param value string of comma separated integer values
+     * @return list of longs
+     */
+    private static List<Long> parseStringToLongList(String value) {
+        return Arrays.stream(value.split(",")).map(Long::parseLong).collect(Collectors.toList());
+    }
+
     /**
      * Converts a string of two integer values separated by a "-" into a list of integers containing the full range
      * including and between both integers.
diff --git a/src/main/java/nl/wur/bif/pantools/utils/Utils.java b/src/main/java/nl/wur/bif/pantools/utils/Utils.java
index 020bad51d1be36f21919ffb919855e865ccb9fa3..8138726c28a0b00f1b12f41ab3fa242d82f4f042 100644
--- a/src/main/java/nl/wur/bif/pantools/utils/Utils.java
+++ b/src/main/java/nl/wur/bif/pantools/utils/Utils.java
@@ -22,7 +22,6 @@ import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.*;
 import java.util.concurrent.TimeUnit;
-import java.util.logging.*;
 
 import static nl.wur.bif.pantools.pangenome.GenomeLayer.*;
 import static nl.wur.bif.pantools.utils.Globals.*;
@@ -45,21 +44,21 @@ public class Utils {
      */
     public static void connect_pangenome() {
         if (WORKING_DIRECTORY == null) {
-            System.out.println("No database was provided (-dp)\n");
+            Pantools.logger.error("No database directory was provided!");
             System.exit(1);
         }
         Scanner s;
         String str;
         if (new File(WORKING_DIRECTORY).exists()) {
             if (! new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH).exists()) {
-                System.out.println("No graph database found at " + WORKING_DIRECTORY);
+                Pantools.logger.error("No graph neo4j database found at {}", WORKING_DIRECTORY);
                 System.exit(1);
             }
             if (! new File(WORKING_DIRECTORY + INDEX_DATABASE_PATH).exists()) {
-                System.out.println("No index database found at -> " + WORKING_DIRECTORY + INDEX_DATABASE_PATH);
+                Pantools.logger.error("No index neo4j database found at {}", WORKING_DIRECTORY + INDEX_DATABASE_PATH);
                 System.exit(1);
             }
-            System.out.print("\rStarting the pangenome database");
+            Pantools.logger.info("Starting the pangenome database.");
             GRAPH_DB = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(
                             new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH))
                     .setConfig(GraphDatabaseSettings.keep_logical_logs, "4 files")
@@ -68,7 +67,7 @@ public class Utils {
             indexDb = new IndexDatabase(WORKING_DIRECTORY + INDEX_DATABASE_PATH, "sorted");
             if (! new File(WORKING_DIRECTORY + GENOME_DATABASE_PATH).exists()) {
                 s = new Scanner(System.in);
-                System.out.println("No genome database found at " + WORKING_DIRECTORY);
+                Pantools.logger.warn("No genome database found at {}", WORKING_DIRECTORY);
                 System.out.println("Do you want to reconstruct it from the graph database [y/n]? ");
                 str = s.nextLine().toLowerCase();
                 while (!str.equals("y") && !str.equals("n")) {
@@ -78,14 +77,13 @@ public class Utils {
                 if (str.equals("y")) {
                     rebuild_genome_database();
                 } else {
-                    System.out.println("Exiting the program...");
+                    Pantools.logger.error("Exiting the program...");
                     System.exit(1);
                 }
             } else
                 genomeDb = new SequenceDatabase(WORKING_DIRECTORY + GENOME_DATABASE_PATH);
-            System.out.print("\r                                           \r");
         } else {
-            System.out.println("No pangenome found at " + WORKING_DIRECTORY);
+            Pantools.logger.error("No pangenome found at {}", WORKING_DIRECTORY);
             System.exit(1);
         }
     }
@@ -122,58 +120,35 @@ public class Utils {
     /**
      * Creates and connects to genome, index and graph databases of the pangenome.
      */
-    public static void create_pangenome_database() {
-        File theDir;
-        if (WORKING_DIRECTORY == null) {
-            System.out.println("WORKING_DIRECTORY is empty.");
-            System.exit(1);
-        }
-        theDir = new File(WORKING_DIRECTORY);
-        if (theDir.exists()) {
-            System.out.print("\rDeleting database");
-            delete_directory(WORKING_DIRECTORY);
-            System.out.print("\r                   ");
-        }
-
-        try {
-            theDir.mkdir();
-        } catch (SecurityException se) {
-            System.out.println("Failed to create directory " + WORKING_DIRECTORY);
-            System.exit(1);
-        }
-        GRAPH_DB = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH))
-                .setConfig(keep_logical_logs, "4 files").newGraphDatabase();
-        registerShutdownHook(GRAPH_DB);
+    public static void createPangenomeDatabase(Path databaseDirectory) throws IOException {
+        GRAPH_DB = createGraphDatabaseService(databaseDirectory);
         genomeDb = new SequenceDatabase(WORKING_DIRECTORY + GENOME_DATABASE_PATH, PATH_TO_THE_GENOMES_FILE);
         indexDb = new IndexDatabase(WORKING_DIRECTORY + INDEX_DATABASE_PATH, PATH_TO_THE_GENOMES_FILE, genomeDb, K_SIZE);
-        new File(WORKING_DIRECTORY + "/databases/genome.db/genomes").mkdir(); // create directory
-        new File(WORKING_DIRECTORY + "log").mkdir(); // create directory
+        Files.createDirectories(databaseDirectory.resolve("databases").resolve("genome.db").resolve("genomes"));
+        Files.createDirectory(databaseDirectory.resolve("log")); //TODO: remove
     }
 
     /**
      * Creates and connects to graph databases of the pangenome.
      */
-    public static void create_panproteome_database() {
-        File theDir;
-        if (WORKING_DIRECTORY == null) {
-            System.out.println("WORKING_DIRECTORY is empty.");
-            System.exit(1);
-        }
-        theDir = new File(WORKING_DIRECTORY);
-        System.out.print("\rDeleting database");
-        delete_directory(WORKING_DIRECTORY + GRAPH_DATABASE_PATH);
-        System.out.print("\r                    \r");
-        try {
-            theDir.mkdir();
-        } catch (SecurityException se) {
-            System.out.println("Failed to create directory " + WORKING_DIRECTORY);
-            System.exit(1);
-        }
-        GRAPH_DB = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH))
-                .setConfig(keep_logical_logs, "4 files").newGraphDatabase();
-        registerShutdownHook(GRAPH_DB);
-        new File(WORKING_DIRECTORY + "log").mkdir();  // create directory
-        new File(WORKING_DIRECTORY + "proteins").mkdir(); // create directory
+    public static void CreatePanproteomeDatabase(Path databaseDirectory) throws IOException {
+        GRAPH_DB = createGraphDatabaseService(databaseDirectory);
+        Files.createDirectory(databaseDirectory.resolve("proteins"));
+        Files.createDirectory(databaseDirectory.resolve("log")); //TODO: remove
+    }
+
+    /**
+     * Create neo4j graph database service for the database directory path
+     * @param databaseDirectory path to the pangenome or panproteome database directory
+     * @return neo4j graph database service
+     */
+    private static GraphDatabaseService createGraphDatabaseService(Path databaseDirectory) {
+        final GraphDatabaseService graphDb =  new GraphDatabaseFactory()
+                .newEmbeddedDatabaseBuilder(databaseDirectory.resolve("databases").resolve("graph.db").toFile())
+                .setConfig(keep_logical_logs, "4 files")
+                .newGraphDatabase();
+        registerShutdownHook(graphDb);
+        return graphDb;
     }
 
     /**
@@ -182,17 +157,16 @@ public class Utils {
     public static void connect_panproteome() {
         if (new File(WORKING_DIRECTORY).exists()) {
             if (! new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH).exists()) {
-                System.out.println("No graph database found at " + WORKING_DIRECTORY);
+                Pantools.logger.error("No graph database found at {}", WORKING_DIRECTORY);
                 System.exit(1);
             }
-            System.out.print("\rStarting the pangenome database");
+            Pantools.logger.debug("Starting the pangenome database.");
             GRAPH_DB = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(
                             new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH))
                     .setConfig(keep_logical_logs, "4 files").newGraphDatabase();
             registerShutdownHook(GRAPH_DB);
-            System.out.print("\r                               \r");
         } else {
-            System.out.println("No panproteome found at " + WORKING_DIRECTORY);
+            Pantools.logger.error("No panproteome found at {}", WORKING_DIRECTORY);
             System.exit(1);
         }
     }
@@ -245,7 +219,7 @@ public class Utils {
      */
     public static void create_directory_in_DB(String pathname) {
         if (pathname == null) {
-            System.out.println("Directory is not defined");
+            Pantools.logger.error("Directory is not defined.");
             System.exit(1);
         }
         new File(WORKING_DIRECTORY + pathname).mkdirs(); // create directory
@@ -291,7 +265,7 @@ public class Utils {
         }
         //  traverse the path of the region
         while (len < seq_len) {
-            //System.out.println(node.getId()+" "+len + " " + seq_len);
+            //Pantools.logger.debug("{} {} {}", node.getId(), len, seq_len);
             loc = (begin + len) - K_SIZE + 1;
             rel = get_outgoing_edge(node, origin, loc);
             neighbor = rel.getEndNode();
@@ -322,24 +296,18 @@ public class Utils {
                 MemoryUsage peak = pool.getPeakUsage();
                 memoryUsage += peak.getUsed();
             }
-            System.out.println("Peak memory : " + memoryUsage / 1024 / 1024 + " MB");
+            Pantools.logger.info("Peak memory : {} MB.", memoryUsage / 1024 / 1024);
         } catch (Throwable t) {
-            System.err.println("Exception in agent: " + t);
+            Pantools.logger.error("Exception in agent: {}.", t);
         }
     }
 
     /**
      * Prints the number of threads to screen and returns number as string
-     * @param print print to terminal
-     * @param multiple_allowed - if the function is able to use more threads
      * @return number of threads as string
      */
-    public static String report_number_of_threads(boolean print, boolean multiple_allowed) {
-        if (THREADS == 1 && print && multiple_allowed) {
-            System.out.println("No number of threads are selected via --threads or -tn, using 1");
-        } else if (THREADS > 1 && !multiple_allowed && print) {
-            System.out.println("Multiple threads are selected but this function is only able to use 1. Using 1 thread");
-        }
+    public static String report_number_of_threads() {
+        Pantools.logger.info("Number of threads: {}.", THREADS);
         return String.valueOf(THREADS);
     }
 
@@ -353,7 +321,7 @@ public class Utils {
             last_grouping_successful = (boolean) pangenome_node.getProperty("last_grouping_successful");
         }
         if (!last_grouping_successful) {
-            System.out.println("\nNo (correct) grouping is present. Please run 'group'\n");
+            Pantools.logger.error("No (correct) grouping is present. Please run 'group'.");
             System.exit(1);
         }
     }
@@ -444,7 +412,7 @@ public class Utils {
                 .append(String.join("", Collections.nCopies((int) (Math.log10(total)) - (int) (Math.log10(current)), " ")))
                 .append(String.format(" %d/%d " + addition, current, total));
         if (!every_hundred || (current % 100 == 0 || current == total)) {
-            System.out.print(string);
+            Pantools.logger.info(string.toString());
         }
     }
 
@@ -510,7 +478,7 @@ public class Utils {
                 }
             }
         } catch (IOException ex) {
-            System.err.println(ex.getMessage());
+            Pantools.logger.warn("Error while reading file {}", file_name);
         }
         return false;
     }
@@ -535,7 +503,7 @@ public class Utils {
                 }
             }
         } catch (IOException ex) {
-            System.err.println(ex.getMessage());
+            Pantools.logger.warn("Error while reading file {}", file_name);
         }
         return false;
     }
@@ -555,7 +523,7 @@ public class Utils {
             else
                 return new BufferedReader(new BufferedReader(new FileReader(filename)));
         } catch (IOException ex) {
-            System.out.println(ex.getMessage());
+            Pantools.logger.error("Error while reading file {}", filename);
             return null;
         }
     }
@@ -580,7 +548,7 @@ public class Utils {
             }
             in.close();
         } catch (IOException ex) {
-            System.err.println(ex.getMessage());
+            Pantools.logger.warn("Error while reading file {}", file_name);
         }
         return count;
     }
@@ -599,7 +567,7 @@ public class Utils {
             BufferedReader br2 = new BufferedReader(new InputStreamReader(input));
             return br2;
         } catch (Exception ex) {
-            System.err.println(ex.getMessage() + "\nFailed to open the compresse file!");
+            Pantools.logger.warn("Failed to open the compressed file {}", fileIn);
             return null;
         }
     }
@@ -636,9 +604,7 @@ public class Utils {
      * @return
      */
     public static boolean check_if_file_exists(String path) {
-        if (LOG) {
-            System.out.println("Checking existence of: " + path);
-        }
+        Pantools.logger.debug("Checking existence of: {}", path);
         File file = new File(path);
         boolean exists = file.exists();
         if (exists) { // if the file exists, check if anything is in there
@@ -656,7 +622,7 @@ public class Utils {
      * @param stop
      * @return boolean
      */
-    public static boolean test_if_correct_label(Node target_node, Label required_label1, boolean stop) {
+    public static boolean test_if_correct_label(Node target_node, Label required_label1, boolean stop) throws RuntimeException {
         String required_label = required_label1.toString();
         boolean correct_label = false;
         Iterable<Label> all_labels = target_node.getLabels();
@@ -667,8 +633,8 @@ public class Utils {
             }
         }
         if (!correct_label && stop) {
-            System.out.println(target_node + " is not a '" + required_label + "' node");
-            System.exit(1);
+            Pantools.logger.error("{} is not a '{}' node", target_node, required_label);
+            throw new RuntimeException("Node " + target_node + " is not a '" + required_label + "' node");
         }
         return correct_label;
     }
@@ -708,8 +674,7 @@ public class Utils {
         try (BufferedWriter out = new BufferedWriter(new FileWriter(WORKING_DIRECTORY + output_file))) {
             out.write(output_builder.toString());
         } catch (IOException ioe) {
-            System.out.println(ioe.getMessage());
-            System.out.println("\nUnable to create: " + WORKING_DIRECTORY + output_file + "\n");
+            Pantools.logger.error("Unable to create: {}", WORKING_DIRECTORY + output_file);
             System.exit(1);
         }
     }
@@ -723,8 +688,7 @@ public class Utils {
         try (BufferedWriter out = new BufferedWriter(new FileWriter(WORKING_DIRECTORY + output_file))) {
             out.write(output_str);
         } catch (IOException ioe) {
-            System.out.println(ioe.getMessage());
-            System.out.println("\nUnable to create: " + WORKING_DIRECTORY + output_file + "\n");
+            Pantools.logger.error("Unable to create: {}", WORKING_DIRECTORY + output_file);
             System.exit(1);
         }
     }
@@ -739,8 +703,7 @@ public class Utils {
         try (FileWriter fw = new FileWriter(WORKING_DIRECTORY + output_file, true)) { // true allows to append the original instead of overwriting
             fw.write(output_str);
         } catch(IOException ioe) {
-            System.out.println(ioe.getMessage());
-            System.out.println("\nUnable to create: " + WORKING_DIRECTORY + output_file + "\n");
+            Pantools.logger.error("Unable to create: {}", WORKING_DIRECTORY + output_file);
             System.exit(1);
         }
     }
@@ -760,7 +723,7 @@ public class Utils {
             fw.close();
             in.close();
         } catch (IOException ioe) {
-            System.out.println("\nFailed to read and write : " + input_file + " " + output_file);
+            Pantools.logger.error("Failed to read and write : {} {}", input_file, output_file);
             System.exit(1);
         }
     }
@@ -774,8 +737,7 @@ public class Utils {
         try (BufferedWriter out = new BufferedWriter(new FileWriter(output_file))) {
             out.write(output_builder.toString());
         } catch (IOException ioe) {
-            System.out.println(ioe.getMessage());
-            System.out.println("\nUnable to create: " + output_file + "\n");
+            Pantools.logger.error("Unable to create: {}", output_file);
             System.exit(1);
         }
     }
@@ -789,9 +751,7 @@ public class Utils {
         try (BufferedWriter out = new BufferedWriter(new FileWriter(output_file))) {
             out.write(output_str);
         } catch (IOException ioe) {
-            System.out.println(ioe.getMessage());
-            System.out.println("\nUnable to create: " + output_file + "\n");
-            System.exit(1);
+            throw new RuntimeException("Unable to create " + output_file, ioe);
         }
     }
 
@@ -804,7 +764,7 @@ public class Utils {
         try (BufferedWriter out = new BufferedWriter(new FileWriter(output_file, true))) {
             out.write(output_str);
         } catch (IOException ioe) {
-            System.err.println("\nUnable to create: " + ioe.getMessage());
+            throw new RuntimeException("Unable to create " + output_file, ioe);
         }
     }
 
@@ -818,9 +778,7 @@ public class Utils {
         try (FileWriter fw = new FileWriter(output_file, true)) { //the true will append the new data
             fw.write(output_str);
         } catch(IOException ioe) {
-            System.out.println(ioe.getMessage());
-            System.out.println("\nUnable to create: " + output_file + "\n");
-            System.exit(1);
+            throw new RuntimeException("Unable to create " + output_file, ioe);
         }
     }
 
@@ -877,7 +835,7 @@ public class Utils {
         }
         String log_str = exe_output.toString();
         if (log_str.length() < min_length) {
-            System.out.println(toolname + " is not installed (" + log_str.length() + ")");
+            Pantools.logger.error("{} is not installed ({}).", toolname, log_str.length());
             System.exit(1);
         }
     }
@@ -907,7 +865,7 @@ public class Utils {
         String log_str = exe_output.toString();
         boolean exists = true;
         if (log_str.length() < min_length && stop) {
-            System.out.println("\n" + toolname + " is not installed (" + log_str.length() + ")");
+            Pantools.logger.error("{} is not installed ({}).", toolname, log_str.length());
             System.exit(1);
         } else if (log_str.length() < min_length) {
             exists = false;
@@ -915,20 +873,20 @@ public class Utils {
         return exists;
     }
 
-    /*
-      Verify if --database-path actually contains a graph database
-    */
+    /**
+     *  Verify if PATH_TO_THE_PANGENOME_DATABASE actually contains a graph database
+     */
     public static void check_database() {
         if (PATH_TO_THE_PANGENOME_DATABASE == null) {
-            System.out.println("No --database-path or -dp was provided\n");
+            Pantools.logger.error("No database directory was provided.");
             System.exit(1);
         }
         File file = new File(PATH_TO_THE_PANGENOME_DATABASE);
         if (!file.exists()) {
-            System.out.println("\nThe provided database was not found!\n");
+            Pantools.logger.error("The provided database was not found: {}", PATH_TO_THE_PANGENOME_DATABASE);
             System.exit(1);
         } else if (!new File(PATH_TO_THE_PANGENOME_DATABASE + GRAPH_DATABASE_PATH).exists()) {
-            System.out.println("Unable to open the database provided via --database-path/-dp\n");
+            Pantools.logger.error("Unable to open the database provided: {}", PATH_TO_THE_PANGENOME_DATABASE);
             System.exit(1);
         }
     }
@@ -941,12 +899,12 @@ public class Utils {
         phenotype_map = new HashMap<>();
         phenotype_threshold_map = new HashMap<>();
         if (PHENOTYPE == null) {
-            //System.out.println("No phenotype was provided (via --phenotype)");
+            //Pantools.logger.info("No phenotype was provided (via --phenotype)");
             return;
         }
         int pheno_node_count = (int) count_nodes(phenotype_label);
         if (pheno_node_count == 0) {
-            System.out.println("\nNo phenotype nodes are present yet. Please run 'add_phenotypes'\n");
+            Pantools.logger.error("No phenotype nodes are present yet. Please run 'add_phenotypes'.");
             System.exit(1);
         }
         ResourceIterator<Node> pheno_nodes = GRAPH_DB.findNodes(phenotype_label);
@@ -958,7 +916,7 @@ public class Utils {
             if (pheno_node.hasProperty(PHENOTYPE)) {
                 value = pheno_node.getProperty(PHENOTYPE);
             } else {
-                System.out.println("\nThe provided phenotype '" + PHENOTYPE + "' was not found in the phenotype nodes!\n");
+                Pantools.logger.error("The provided phenotype '{}' was not found in the phenotype nodes!", PHENOTYPE);
                 System.exit(1);
                 continue;
             }
@@ -978,7 +936,7 @@ public class Utils {
                 classification.try_incr_AL_hashmap(temp_phenotype_map, value_str, current_genome);
                 geno_pheno_map.put(current_genome, value_str);
             } else {
-                System.out.println("something else.. finish function 486324");
+                Pantools.logger.error("something else.. finish function 486324"); // TODO: what does this even mean?
                 System.exit(1);
             }
         }
@@ -1042,7 +1000,7 @@ public class Utils {
         try {
             FileUtils.deleteDirectory(new File(path));
         } catch (IOException nee) {
-            System.out.println("\nUnable to delete this directory: " + path + "\n");
+            Pantools.logger.error("Unable to delete this directory: {}", path);
         }
     }
 
@@ -1050,7 +1008,7 @@ public class Utils {
         try {
             FileUtils.deleteDirectory(new File(WORKING_DIRECTORY + path));
         } catch (IOException ioe) {
-            System.out.println("Unable to delete this directory:\n" + path);
+            Pantools.logger.error("Unable to delete this directory: {}", path);
         }
     }
 
@@ -1141,17 +1099,16 @@ public class Utils {
         String[] allowed_commands = new String[]{"bpg","build_pangenome","annotate_genomes","build_panproteome","bpp"};
         check_database();
         if (WORKING_DIRECTORY == null) {
-            System.out.println("No database was provided via --database-path or -dp\n");
+            Pantools.logger.error("No database directory was provided.");
             System.exit(1);
         }
 
         if (GRAPH_DB == null || !GRAPH_DB.isAvailable(0)) {
-            System.out.print("\rStarting the pangenome database");
+            Pantools.logger.info("Starting the pangenome database.");
             GRAPH_DB = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH))
                     .setConfig(keep_logical_logs, "4 files").newGraphDatabase();
             registerShutdownHook(GRAPH_DB);
         }
-        System.out.print("\r                                 \r");
     }
 
     /**
@@ -1170,11 +1127,10 @@ public class Utils {
                 line = in.readLine().trim();
             }
         } catch (IOException ioe) {
-            System.out.println("\nFailed to read: tmp.log\n");
-            System.exit(1);
+            throw new RuntimeException("Failed to read: tmp.log");
         }
         if (line.length() == 0) {
-            System.out.println("The program '" + command_str + "' cannot be found");
+            Pantools.logger.error("The program '{}' cannot be found.", command_str);
             System.exit(1);
         }
         delete_file_in_DB("tmp.log");
@@ -1207,7 +1163,7 @@ public class Utils {
         if (pangenome_node.hasProperty("k_mer_size")) {
             K_SIZE = (int) pangenome_node.getProperty("k_mer_size"); // The "k_mer_size" property is only present in pangenomes
         } else {
-            System.out.println("The function '" + function_name + "' does not work on a panproteome\n");
+            Pantools.logger.error("The function '{}' does not work on a panproteome.", function_name);
             System.exit(1);
         }
         total_genomes = (int) pangenome_node.getProperty("num_genomes");
@@ -1230,11 +1186,11 @@ public class Utils {
      *
      */
     public static void print_mapping_parameters() {
-        System.out.println("MIN_IDENTITY = " + MIN_IDENTITY);
-        System.out.println("NUM_KMER_SAMPLES = " + NUM_KMER_SAMPLES);
-        System.out.println("MAX_NUM_LOCATIONS = " + MAX_NUM_LOCATIONS);
-        System.out.println("ALIGNMENT_BOUND = " + ALIGNMENT_BOUND);
-        System.out.println("CLIPPING_STRINGENCY = " + CLIPPING_STRINGENCY);
+        Pantools.logger.debug("MIN_IDENTITY = {}", MIN_IDENTITY);
+        Pantools.logger.debug("NUM_KMER_SAMPLES = {}", NUM_KMER_SAMPLES);
+        Pantools.logger.debug("MAX_NUM_LOCATIONS = {}", MAX_NUM_LOCATIONS);
+        Pantools.logger.debug("ALIGNMENT_BOUND = {}", ALIGNMENT_BOUND);
+        Pantools.logger.debug("CLIPPING_STRINGENCY = {}", CLIPPING_STRINGENCY);
     }
 
     /**
@@ -1243,14 +1199,14 @@ public class Utils {
      * @return a list with homology groups
      * @throws IOException if file cannot be read
      */
-    public static List<Integer> parseHmFile(Path hmFile) throws IOException {
-        ArrayList<Integer> hmGroups = new ArrayList<>();
+    public static List<Long> parseHmFile(Path hmFile) throws IOException {
+        ArrayList<Long> hmGroups = new ArrayList<>();
         try (BufferedReader br = new BufferedReader(new FileReader(hmFile.toFile()))) {
             String line;
             while ((line = br.readLine()) != null) {
                 String[] hmGroup = line.split(",");
                 for (String s : hmGroup) {
-                    hmGroups.add(Integer.parseInt(s));
+                    hmGroups.add(Long.parseLong(s));
                 }
             }
         } catch (IOException e) {
@@ -1260,4 +1216,40 @@ public class Utils {
         }
         return hmGroups;
     }
-}
\ No newline at end of file
+
+
+    /**
+     * Get all currently active homology groups (if hmGroups is null, otherwise get the homology groups in hmGroups)
+     * @param hmGroups a list of homology group ids (in case of null, all homology groups are retrieved)
+     * @param minimumMembers the minimum number of members a homology group should have
+     * @return arraylist of homology group nodes that have more than minimumMembers members
+     * NB: has to be called within a transaction
+     */
+    public static ArrayList<Node> findHmNodes(List<Long> hmGroups, int minimumMembers) throws RuntimeException {
+        final ArrayList<Node> hmList = new ArrayList<>();
+        if (hmGroups == null) {
+            Pantools.logger.info("No homology groups were provided, using all.");
+            try (ResourceIterator<Node> hmNodes = GRAPH_DB.findNodes(homology_group_label)) {
+                while (hmNodes.hasNext()) {
+                    Node hmNode = hmNodes.next();
+                    int numMembers = (int) hmNode.getProperty("num_members");
+                    if (numMembers > minimumMembers) {
+                        hmList.add(hmNode);
+                    }
+                }
+            } catch (Exception e) {
+                Pantools.logger.error("Error while retrieving homology groups.");
+                throw new RuntimeException(e);
+            }
+        } else {
+            Pantools.logger.info("Using homology groups provided by user.");
+            for (Long hmGroup : hmGroups) {
+                Node hmNode = GRAPH_DB.getNodeById(hmGroup);
+                test_if_correct_label(hmNode, homology_group_label, true);
+                hmList.add(hmNode);
+            }
+        }
+
+        return hmList;
+    }
+}
diff --git a/src/main/resources/Defaults.properties b/src/main/resources/Defaults.properties
index 761cc2a70dbc1c8086cb79b438b1094aed1c810f..57343d848773ffc31699c302350fce97ddd71950 100644
--- a/src/main/resources/Defaults.properties
+++ b/src/main/resources/Defaults.properties
@@ -12,6 +12,12 @@ pantools.add_phenotypes.bins = 3
 pantools.ani.mode = MASH
 # BuscoProtein
 pantools.busco_protein.version = BUSCO5
+# BuildPangenome
+pantools.build_pangenome.num-buckets = 200
+pantools.build_pangenome.num-db-writer-threads = 2
+pantools.build_pangenome.transaction-size = 10000
+pantools.build_pangenome.cache-size = 10000000
+pantools.build_pangenome.keep-intermediate-files = false
 # CoreSnpTree
 pantools.core_phylogeny.clustering-mode = ML
 # create_tree_template
diff --git a/src/main/resources/ErrorMessages.properties b/src/main/resources/ErrorMessages.properties
index c39682718a2a5e28e0ba8d672daa9fb6c712619c..cc625add3bfeb58ddec7a9db60af51f2dd2cb7a4 100644
--- a/src/main/resources/ErrorMessages.properties
+++ b/src/main/resources/ErrorMessages.properties
@@ -1,5 +1,4 @@
 directory.busco     = BUSCO directory not found
-directory.database  = Pangenome database directory not found
 
 exclude.core = Nodes of type 'nucleotide', 'sequence', 'pangenome', 'genome', and 'degenerate' should not be removed \
   from the graph database.
@@ -35,6 +34,10 @@ max.fdr                     = --fdr must be less than or equal to {value}
 max.gap-open                = --gap-open must be less than or equal to {value}
 max.gap-ext                 = --gap-ext must be less than or equal to {value}
 max.ksize                   = K must be less than or equal to {value}
+min.num-buckets             = number of buckets for sorting must be at least {value}
+min.transaction-size        = transaction size must be at least {value}
+min.num-db-writer-threads   = number of database writer threads must be at least {value}
+min.cache-size              = cache size must be at least {value}
 max.loops                   = --loops must be less than or equal to {value}
 max.max-alignment-length    = --max-alignment-length must be less than or equal to {value}
 max.max-fragment-length     = --max-fragment-length must be less than or equal to {value}
diff --git a/src/main/resources/ManualSections.properties b/src/main/resources/ManualSections.properties
index 989f757d7009fc9840041cf781cbb0dfa28236b3..a7e2ebb594862eb0b34ee7637982a24eed415e1b 100644
--- a/src/main/resources/ManualSections.properties
+++ b/src/main/resources/ManualSections.properties
@@ -8,7 +8,7 @@ add_antismash = user_guide/construct
 add_phenotypes = user_guide/construct
 busco_protein = user_guide/construct
 remove_nodes = user_guide/construct
-remove_phenotype = user_guide/construct
+remove_phenotypes = user_guide/construct
 remove_annotations = user_guide/construct
 remove_features = user_guide/construct
 remove_functions = user_guide/construct
diff --git a/src/main/resources/MessageBundle.properties b/src/main/resources/MessageBundle.properties
index 54845de012df3cdc6da2ad130ced4e2d978592c6..18dde0a28545cb54934f71a7e6e0cfeaccf011a5 100644
--- a/src/main/resources/MessageBundle.properties
+++ b/src/main/resources/MessageBundle.properties
@@ -28,7 +28,7 @@ pantools.add_antismash.section = add annotation features to the genome
 pantools.add_phenotypes.section = add annotation features to the genome
 pantools.busco_protein.section = add annotation features to the genome
 pantools.remove_nodes.section = remove data from the pangenome or panproteome
-pantools.remove_phenotype.section = remove data from the pangenome or panproteome
+pantools.remove_phenotypes.section = remove data from the pangenome or panproteome
 pantools.remove_annotations.section = remove data from the pangenome or panproteome
 pantools.remove_functions.section = remove data from the pangenome or panproteome
 pantools.move_grouping.section = remove data from the pangenome or panproteome
@@ -67,9 +67,11 @@ pantools.retrieve_regions.section = retrieve regions or features
 pantools.retrieve_features.section = retrieve regions or features
 pantools.msa.section = sequence alignments
 pantools.map.section = read mapping
+pantools.export_pangenome.section = export pangenome
 
 # Description
-pantools.build_pangenome.usage.description.0 = Build a pangenome from a set of genomes.
+pantools.build_pangenome.usage.description.0 = Build a pangenome from a set of genomes. Please see the manual with \
+  'build_pangenome --manual' for a description of the options.
 pantools.build_pangenome.usage.description.1 = Required software: KMC 2.3 or 3.0.
 pantools.add_genomes.usage.description.0 = Add additional genomes to an existing pangenome.
 pantools.add_genomes.usage.description.1 = Required software: KMC 2.3 or 3.0
@@ -82,7 +84,7 @@ pantools.add_phenotypes.usage.description = Add phenotype data to the pangenome.
 pantools.busco_protein.usage.description.0 = Identify BUSCO genes in the pangenome.
 pantools.busco_protein.usage.description.1 = Required software: BUSCO v3, v4 or v5 (not included in conda yaml file for macOS).
 pantools.remove_nodes.usage.description = Remove a selection of nodes and their relationships from the pangenome.
-pantools.remove_phenotype.usage.description = Delete phenotype nodes or remove specific phenotype \
+pantools.remove_phenotypes.usage.description = Delete phenotype nodes or remove specific phenotype \
   information from the nodes.
 pantools.remove_annotations.usage.description = Remove all the genomic features that belong to annotations.
 pantools.remove_functions.usage.description = Remove functional annotations from the pangenome.
@@ -144,6 +146,8 @@ pantools.retrieve_genomes.usage.description = Retrieve complete genomes from the
 pantools.retrieve_features.usage.description = Retrieve the sequence of annotated features from the pangenome.
 pantools.map.usage.description = Map single or paired-end short reads to one or multiple genomes in the pangenome. \
   One SAM or BAM file is generated for each genome included in the analysis.
+pantools.export_pangenome.usage.description = Export a pangenome built with build_pangenome into node properties, \
+  relationship properties and node sequence anchors files.
 
 # Options
 annotations-file = A text file with the identifiers of annotations to be included.
@@ -203,6 +207,14 @@ pantools.busco_protein.longest = Only search against the longest protein-coding
 pantools.busco_protein.skip-busco = A list of questionable BUSCOs. The completeness score is recalculated by skipping these \
   genes.
 pantools.busco_protein.busco-version = The BUSCO version (default: BUSCO5).
+# BuildPangenome
+pantools.build_pangenome.scratch-directory = Temporary directory for storing localization update files.
+pantools.build_pangenome.num-buckets = Number of buckets for sorting (default: ${DEFAULT-VALUE}).
+pantools.build_pangenome.transaction-size = Number of localization updates to pack into a single Neo4j transaction (default: ${DEFAULT-VALUE}).
+pantools.build_pangenome.num-db-writer-threads = Number of threads to use for writing to Neo4j (default: ${DEFAULT-VALUE}).
+pantools.build_pangenome.cache-size = Maximum number of items in the node properties cache (default: ${DEFAULT-VALUE}).
+pantools.build_pangenome.keep-intermediate-files = Do not delete intermediate localization files after the command finishes (default: ${DEFAULT-VALUE}).
+
 # ChangeGrouping
 pantools.change_grouping.grouping-version = The version of homology grouping to become active.
 # ConsensusTree
@@ -365,8 +377,8 @@ pantools.remove_grouping.grouping-version = Specific grouping version to be remo
 pantools.remove_nodes.include = Only remove nodes of the selected genomes.
 pantools.remove_nodes.exclude = Do not remove nodes of the selected genomes.
 # RemovePhenotype
-pantools.remove_phenotype.include = Only remove nodes of the selected genomes.
-pantools.remove_phenotype.exclude = Do not remove nodes of the selected genomes.
+pantools.remove_phenotypes.include = Only remove nodes of the selected genomes.
+pantools.remove_phenotypes.exclude = Do not remove nodes of the selected genomes.
 # RenameMatrix
 pantools.rename_matrix.matrix-file = A matrix file with numerical values.
 pantools.rename_matrix.no-numbers = In- or exclude genome numbers from the headers. Numbers are included by default.
diff --git a/src/main/resources/log4j2.properties b/src/main/resources/log4j2.properties
index 331970bff86baef5ec33767235c18df55420ad49..1e5df70a036b65da6d36286019f252c7259d347d 100644
--- a/src/main/resources/log4j2.properties
+++ b/src/main/resources/log4j2.properties
@@ -1,42 +1,47 @@
-# System properties
-property.directory = ${sys:log4j.saveDirectory:-}
-property.level = ${sys:log4j2.level}
-
-# Patterns
-property.patterns.file = %d{yyyy-MM-dd} %d{HH:mm:ss} %m%n
-property.patterns.console = %d{HH:mm:ss} [%-5level] %m%n
+# GLOBALS
+property.LEVEL = ${sys:log4j2.level:-info}
+property.ROOT_LEVEL = ${sys:log4j2.rootLevel:-all}
+property.LOG_DIR = ${sys:log4j2.saveDirectory:-.}
+property.LOG_FILE = ${sys:log4j2.logFile:-log_file}
 
 # APPENDERS
-appenders = console, usage
+appenders = console, universal
 
 # console output
 appender.console.type = Console
 appender.console.name = STDOUT
+appender.console.filter.threshold.type = ThresholdFilter
+appender.console.filter.threshold.level = ${LEVEL}
 appender.console.layout.type = PatternLayout
-appender.console.layout.pattern = ${patterns.console}
-
-# usage log
-appender.usage.type = File
-appender.usage.name = UsageLog
-appender.usage.createOnDemand = true
-appender.usage.fileName = ${directory}/usage.log
-appender.usage.layout.type = PatternLayout
-appender.usage.layout.pattern = ${patterns.file}
+appender.console.layout.pattern = %d{HH:mm:ss} [%-5level] %m%n
+
+# universal log
+appender.universal.type = RollingFile
+appender.universal.name = LogFile
+appender.universal.filter.threshold.type = ThresholdFilter
+appender.universal.filter.threshold.level = debug
+appender.universal.createOnDemand = true
+appender.universal.filePattern = ${LOG_DIR}/${LOG_FILE}.%d{yyyy-MM-dd'T'HH-mm-ss}.log
+appender.universal.layout.type = PatternLayout
+appender.universal.layout.pattern = %d{yyyy-MM-dd} %d{HH:mm:ss} [%-5level] %m%n
+appender.universal.policies.type = Policies
+appender.universal.policies.size.type = SizeBasedTriggeringPolicy
+appender.universal.policies.size.size = 100MB
+appender.universal.strategy.type = DirectWriteRolloverStrategy
 
 # LOGGERS
-loggers = usage, hibernate
+loggers = hibernate, jboss
 
 # root logger
-rootLogger.level = ${level}
-rootLogger.appenderRefs = stdout
-rootLogger.appenderRef.stdout.ref = STDOUT
-
-# usage logger
-logger.usage.name = nl.wur.bif.pantools.pantools.Pantools
-logger.usage.level = ${level}
-logger.usage.appenderRefs = file
-logger.usage.appenderRef.file.ref = UsageLog
+rootLogger.level = ${ROOT_LEVEL}
+rootLogger.appenderRefs = console, universal
+rootLogger.appenderRef.console.ref = STDOUT
+rootLogger.appenderRef.universal.ref = LogFile
 
 # hibernate logger
 logger.hibernate.name = org.hibernate
-logger.hibernate.level = off
\ No newline at end of file
+logger.hibernate.level = off
+
+# jboss logger
+logger.jboss.name = org.jboss
+logger.jboss.level = off
diff --git a/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java b/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java
index ca68516b21e13d3ac8361fed006e81441e91733f..75d4ad11ee55fa9999718bae9b5de4b0a12aa860 100644
--- a/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java
+++ b/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java
@@ -23,6 +23,11 @@ import static org.junit.jupiter.api.Assertions.*;
  */
 class ConstraintsTest {
 
+    @BeforeAll
+    static void disableLogging() {
+        System.setProperty("log4j2.rootLevel", "off");
+    }
+
     @Nested
     @DisplayName("Test @MinOrZero constraint")
     class MinOrZeroTests implements Bean {
@@ -92,25 +97,6 @@ class ConstraintsTest {
         }
     }
 
-    @Nested
-    @DisplayName("Test @MatchCores constraint")
-    class MatchCoresTests implements Bean {
-        @MatchCores
-        private int cores;
-
-        @Test
-        void matchingCoresTest() {
-            cores = Runtime.getRuntime().availableProcessors();
-            assertTrue(this.isValid());
-        }
-
-        @Test
-        void notMatchingCoresTest() {
-            cores = Runtime.getRuntime().availableProcessors() + 1;
-            assertFalse(this.isValid());
-        }
-    }
-
     @Nested
     @DisplayName("Tests for files and directory constraints")
     @TestInstance(TestInstance.Lifecycle.PER_CLASS)
@@ -123,33 +109,6 @@ class ConstraintsTest {
             addShutDownHook(tempDirectory);
         }
 
-        @Nested
-        @DisplayName("Test @GraphDatabase constraint")
-        class GraphDatabaseTests implements Bean {
-            @GraphDatabase
-            private Path databaseDirectory;
-
-            @Test
-            void emptyGraphDatabaseTest() throws IOException {
-                databaseDirectory = Files.createDirectory(tempDirectory.resolve("DB"));
-                Files.createDirectories(tempDirectory.resolve("databases/graph.db"));
-                assertFalse(this.isValid());
-            }
-
-            @Test
-            void neo4jGraphDatabaseTest() throws IOException {
-                databaseDirectory = Files.createDirectory(tempDirectory.resolve("DB"));
-                Files.createDirectories(databaseDirectory.resolve("databases/graph.db"));
-                Files.createFile(databaseDirectory.resolve("databases/graph.db/file"));
-                assertTrue(this.isValid());
-            }
-
-            @AfterEach
-            void cleanDatabase() throws IOException {
-                cleanDirectory(databaseDirectory.toFile());
-            }
-        }
-
         @Nested
         @DisplayName("Test @InputDirectory constraint")
         class InputDirectoryTests implements Bean {
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..5436ee2d2e5575706ea0acc44737d2d87a1b89fb
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,5 @@
+input/
+output/
+jars/
+.snakemake/
+
diff --git a/tests/Snakefile b/tests/Snakefile
new file mode 100644
index 0000000000000000000000000000000000000000..e5a1b08f5c67389d0418111db438960141f945fc
--- /dev/null
+++ b/tests/Snakefile
@@ -0,0 +1,239 @@
+"""
+This Snakefile requires Java, Maven, curl and samtools to be installed.
+
+It will check:
+
+1. The identity of the pangenome between the local version and a reference
+   version, as defined in the `shared.yaml` config file;
+2. Idem for the identity of the SAM files generated by both versions.
+
+To execute, specify a configuration file with the data set, e.g.:
+
+snakemake --cores 1 --configfile yeast-1.yaml
+
+The pipeline will package the local version into a .jar, as well as a reference
+version (a git revision) specified in shared.yaml. It will download the dataset
+referenced by the configuration file passed in by the user with --configfile
+(e.g. yeast-1.yaml, yeast-4.yaml), and download and untar the dataset.
+
+A pangenome will be built with `build_pangenome` for the local and reference
+versions each. Pangenomes will be exported with `export_pangenome`, the
+resulting .csv files sorted, and compared with the cmp command line tool for
+differences. In case a difference is found, the pipeline will fail.
+
+Reads from the data set will be aligned to the local and reference pangenomes,
+resulting in a .sam file per genome per database (local or reference). Each
+.sam file is and will have its @PG header line stripped to remove samtools
+metadata that would otherwise fail the comparison immediately. Then, .sam files
+for each genome are compared between the local and reference versions with the
+cmp command line tool.
+"""
+configfile: "shared.yaml"
+
+maven = f"{config['tools']['maven']['command']} {' '.join(config['tools']['maven']['arguments'])}"
+dataset_name = config["dataset"]["name"]
+java = f"java {' '.join(config['tools']['pantools']['java_arguments'])}"
+
+rule all:
+    input:
+        # Pangenome export comparison manifest checks. A .cmp file will be generated for each
+        # type of exported CSV file generated by 'pantools export_pangenome'.
+        expand(
+            "output/validation/exports/{dataset_name}/local-vs-{version}/{comparison_type}.cmp",
+            dataset_name=dataset_name,
+            version=config["tools"]["pantools"]["reference_version"],
+            comparison_type=["node-properties", "relationship-properties", "sequence-node-anchors"]
+        ),
+        # SAM files comparison .TSV files. A .cmp file will be generated for each pair of SAM
+        # files, a pair being the alignments against genome x for the local version and the
+        # reference version.
+        expand(
+            "output/validation/alignments/{dataset_name}/local-vs-{version}/pantools_{genome}.cmp",
+            dataset_name=dataset_name,
+            version=config["tools"]["pantools"]["reference_version"],
+            genome=range(1, len(config["dataset"]["genomes"]) + 1)
+        )
+
+rule package_local_pantools:
+    """Package the local (git) version of PanTools into a .jar."""
+    output:
+        "jars/pantools/target/pantools-local.jar"
+    shell:
+        """
+        {maven} -f ../pom.xml clean package
+        mv ../target/pantools-*.jar {output}
+        """
+
+rule package_remote_pantools_version:
+    """
+    Package any PanTools version into a .jar by cloning it from git.
+    NOTE: to reduce clone time and disk space the repo is cloned only to a limited
+    depth. The reference version should be updated periodically.
+    """
+    output:
+        "jars/pantools/target/pantools-{version}.jar"
+    params:
+        repository_url=config["tools"]["pantools"]["repository_url"],
+        clone_depth=150
+    shell:
+        """
+        # TODO: clean up
+        TEMPORARY_DIRECTORY=$(mktemp -d)
+        git clone --depth {params.clone_depth} --branch develop {params.repository_url} ${{TEMPORARY_DIRECTORY}}
+        cd ${{TEMPORARY_DIRECTORY}}
+        git checkout {wildcards.version}
+        cd -
+        {maven} -f ${{TEMPORARY_DIRECTORY}}/pom.xml clean package
+        mv ${{TEMPORARY_DIRECTORY}}/target/pantools-*.jar {output}
+        rm -rf ${{TEMPORARY_DIRECTORY}}
+        """
+
+rule build_pangenome:
+    """Run build_pangenome with a versioned .jar on a genomes.txt."""
+    input:
+        jar="jars/pantools/target/pantools-{version}.jar",
+        genomes="input/data/{dataset_name}/genomes/genomes.txt"
+    output:
+        output_directory=directory("output/databases/{dataset_name}/{version}/"),
+        done_marker=touch("output/databases/{dataset_name}/{version}.done")
+    shell:
+        """
+        {java} \
+            -jar {input.jar} \
+            build_pangenome \
+            {output.output_directory} \
+            {input.genomes}
+        """
+
+rule export_pangenome:
+    """Export pangenome to multiple, each containing different properties."""
+    input:
+        database_done_marker="output/databases/{dataset_name}/{version}.done",
+        jar="jars/pantools/target/pantools-local.jar"
+    output:
+        node_properties="output/exports/{dataset_name}/{version}/node-properties.csv",
+        relationship_properties="output/exports/{dataset_name}/{version}/relationship-properties.csv",
+        sequence_node_anchors="output/exports/{dataset_name}/{version}/sequence-node-anchors.csv"
+    shell:
+        """
+        {java} \
+            -jar {input.jar} \
+            export_pangenome \
+            $(dirname {input.database_done_marker})/{wildcards.version} \
+           --node-properties-file {output.node_properties} \
+           --relationship-properties-file {output.relationship_properties} \
+           --sequence-node-anchors-file {output.sequence_node_anchors}
+        """
+
+rule download_dataset:
+    """Download the data set with the provided name."""
+    # TODO: URL should be changed once data is uploaded
+    # TODO: md5sum check?
+    # TODO: split genomes.txt generation from downloading data set
+    # TODO: standardize data set formats, file names, etc.
+    output:
+        genome_list="input/data/{dataset_name}/genomes/genomes.txt",
+        fastq1="input/data/{dataset_name}/reads/1.fastq",
+        fastq2="input/data/{dataset_name}/reads/2.fastq"
+    params:
+        url=config["dataset"]["url"]
+    shell:
+        """
+        mkdir -p "input/data/{wildcards.dataset_name}/"
+        
+        # TODO: for some reason a streaming untar does not seem to work (anymore)
+        curl -o dataset.tar --silent {params.url}
+        tar xf dataset.tar -C input/data/{wildcards.dataset_name}/
+        rm dataset.tar
+        
+        find input/data/{wildcards.dataset_name}/genomes/ -iname '*.fasta' | sort > {output.genome_list}
+        
+        mv input/data/{wildcards.dataset_name}/reads/*_1.fastq {output.fastq1}
+        mv input/data/{wildcards.dataset_name}/reads/*_2.fastq {output.fastq2}
+        """
+
+rule map_reads:
+    """Map reads against a a database, outputs a SAM file per genome."""
+    input:
+        fastq1="input/data/{dataset_name}/reads/1.fastq",
+        fastq2="input/data/{dataset_name}/reads/2.fastq",
+        genome_list="input/data/{dataset_name}/genomes/genomes.txt",
+        database="output/databases/{dataset_name}/{version}/",
+        jar="jars/pantools/target/pantools-{version}.jar"
+    output:
+        sam_files=expand(
+            "output/alignments/{{dataset_name}}/{{version}}/sams/pantools_{genome}.sam",
+            genome=range(1, len(config["dataset"]["genomes"]) + 1)
+        ),
+        genome_numbers_file="output/alignments/{dataset_name}/{version}/genome-numbers.txt"
+    params:
+        num_genomes=len(config["dataset"]["genomes"])
+    shell:
+        """
+        mkdir -p $(dirname {output.genome_numbers_file})
+        
+        seq 1 {params.num_genomes} > {output.genome_numbers_file}
+        
+        {java} \
+            -jar {input.jar} \
+            map \
+            --threads 1 \
+            {input.database} \
+            {output.genome_numbers_file} \
+            {input.fastq1} \
+            {input.fastq2} \
+            --gap-open -20 \
+            --gap-extension -3 \
+            --out-format SAM \
+            --output $(dirname {output.sam_files[0]}) \
+            # --reference 1-$(wc -l <{input.genome_list} | sed 's/ //g')
+        """
+
+rule strip_sam_pg_header_line:
+    """
+    Strip the @PG header line of a SAM file. Necessary to remove samtools metadata,
+    which would otherwise always fail the comparison.
+    """
+    input: "output/alignments/{dataset_name}/{version}/sams/{filename}.sam"
+    output: "output/alignments/{dataset_name}/{version}/sams/{filename}.no-pg-header-line.sam"
+    shell: "grep -v '^@PG' {input} > {output}"
+
+rule compare_sams:
+    """
+    Compare SAM files with cmp. Will exit with a non-zero exit code if files are different.
+    It would be nice to use Picard here, but there's a bug that makes it unusable. See:
+    https://github.com/broadinstitute/picard/issues/284
+    https://github.com/broadinstitute/picard/issues/160
+    """
+    input:
+        sam1="output/alignments/{dataset_name}/{version1}/sams/{filename}.no-pg-header-line.sam",
+        sam2="output/alignments/{dataset_name}/{version2}/sams/{filename}.no-pg-header-line.sam"
+    output:
+        "output/validation/alignments/{dataset_name}/{version1}-vs-{version2}/{filename}.cmp"
+    shell:
+        """
+        mkdir -p $(dirname {output})
+        cmp {input.sam1} {input.sam2} | tee {output}
+        """
+
+rule compare_exported_csvs:
+    """Compare sorted CSV files from export_pangenome with cmp."""
+    input:
+        csv1="output/exports/{dataset_name}/{version1}/{comparison_type}.sorted.csv",
+        csv2="output/exports/{dataset_name}/{version2}/{comparison_type}.sorted.csv"
+    output:
+        "output/validation/exports/{dataset_name}/{version1}-vs-{version2}/{comparison_type}.cmp"
+    shell:
+        """
+        mkdir -p $(dirname {output})
+        cmp {input.csv1} {input.csv2} | tee {output}
+        """
+
+rule sort_csvs:
+    """Sort CSVs exported by export_pangenome."""
+    input:
+        "output/exports/{dataset_name}/{version}/{filename}.csv"
+    output:
+        "output/exports/{dataset_name}/{version}/{filename}.sorted.csv"
+    shell:
+        """sort {input} > {output}"""
diff --git a/tests/shared.yaml b/tests/shared.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe62639baa6e31abe0f4b13d19cd24465b95bc5e
--- /dev/null
+++ b/tests/shared.yaml
@@ -0,0 +1,12 @@
+# Configuration shared by all runs and data sets
+tools:
+  pantools:
+    repository_url: https://git.wur.nl/bioinformatics/pantools.git
+    reference_version: 8da5bd05
+    java_arguments:
+      - -Xmx8g
+  maven:
+    command: mvn
+    arguments:
+      - -DskipTests=true
+
diff --git a/tests/yeast-1.yaml b/tests/yeast-1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0eebe780084daa5fc2c324df35a7a85cef609534
--- /dev/null
+++ b/tests/yeast-1.yaml
@@ -0,0 +1,8 @@
+dataset:
+  name: "yeast-1"
+  url: "https://www.bioinformatics.nl/pangenomics/data/ci/yeast-1.tar.gz"
+  genomes:
+    - GCF_000146045.2_R64_genomic.fasta
+  reads:
+    fastq1: 1_MSv3_1.fastq
+    fastq2: 1_MSv3_2.fastq
diff --git a/tests/yeast-4.yaml b/tests/yeast-4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..815abcef0a45e14b20c26728fc2f25770bb66c11
--- /dev/null
+++ b/tests/yeast-4.yaml
@@ -0,0 +1,11 @@
+dataset:
+  name: "yeast-4"
+  url: "https://www.bioinformatics.nl/pangenomics/data/ci/yeast-4.tar.gz"
+  genomes:
+    - GCA_000167035.1_ASM16703v1_genomic.fasta
+    - GCA_000256765.1_Saccharomyces_kudriavzevii_strain_FM1066_v1.0_genomic.fasta
+    - GCF_000146045.2_R64_genomic.fasta
+    - GCF_001298625.1_SEUB3.0_genomic.fasta
+  reads:
+    fastq1: 1_MSv3_1.fastq
+    fastq2: 1_MSv3_2.fastq