diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 68e69b32b32a8f17fb1c6cea0d85e4867180468c..35b3f778ff7a9ea6571c6197b0196b347eb9d55b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,18 +2,14 @@ image: harbor.containers.wurnet.nl/proxy-cache/library/maven:3.8.4-jdk-8-slim -stages: - - test:unit - - test:documentation - - test:end-to-end +variables: + MAMBA_ROOT_PREFIX: "/opt/micromamba" -unit-tests: - stage: test:unit - script: - - mvn test +stages: + - test sphinx-linter: - stage: test:documentation + stage: test script: - | # Install system dependencies @@ -30,7 +26,7 @@ sphinx-linter: sphinx-lint -e=all --max-line-length=80 sphinx-documentation: - stage: test:documentation + stage: test script: - | # Install system dependencies @@ -46,8 +42,13 @@ sphinx-documentation: sphinx-build -W docs/source output -end-to-end-tests:yeast-1: - stage: test:end-to-end +unit-tests: + stage: test + script: + - mvn test + +end-to-end-yeast-pangenome: + stage: test variables: KUBERNETES_MEMORY_REQUEST: 8Gi KUBERNETES_MEMORY_LIMIT: 16Gi @@ -56,32 +57,35 @@ end-to-end-tests:yeast-1: # Install system dependencies apt-get update - apt-get install --yes --no-install-recommends git - - # Install Conda + apt-get install --yes --no-install-recommends git bzip2 openssh-client sshpass - MINICONDA_INSTALLER=$(mktemp) - curl -o "${MINICONDA_INSTALLER}" --silent "https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh" - bash "${MINICONDA_INSTALLER}" -b - rm "${MINICONDA_INSTALLER}" - export PATH="${PATH}:/root/miniconda3/bin/" + # Install micromamba - # Install Mamba for faster installation of remaining packages + curl -Ls https://micromamba.snakepit.net/api/micromamba/linux-64/1.4.2 | tar xvj bin/micromamba + chmod +x ./bin/micromamba + ./bin/micromamba shell --yes init ${MAMBA_ROOT_PREFIX} + source /root/.bashrc - conda install --yes --channel conda-forge "mamba=0.15.3" + # Install KMC, samtools and Snakemake with micromamba - # Install KMC, samtools and Snakemake with Mamba - - mamba install --yes --channel conda-forge --channel bioconda \ + micromamba create --name snakemake --yes --strict-channel-priority \ + --channel conda-forge --channel bioconda \ "kmc=3.0.1" \ "samtools=1.15" \ "snakemake=7.19.1" - # Run end-to-end tests with Snakemake + # Obtain data from SFTP server cd tests/ - snakemake \ + sshpass -e sftp -o StrictHostKeyChecking=no ${SFTP_USERNAME}@${SFTP_HOST} << EOF + get /yeast_tiny_pangenome.tar.gz + EOF + tar xzvf yeast_tiny_pangenome.tar.gz + + # Run end-to-end tests with Snakemake + + micromamba run --name snakemake snakemake \ --cores 1 \ --printshellcmds \ --debug \ - --configfile yeast-1.yaml + --configfile yeast.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index f52075d2a66dc19fa6cc48ed4949073b77ca0c86..f89d8499603569999e925011eb6f6435a198917a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ All notable changes to Pantools will be documented in this file. +## [4.2.2] - 23-06-2023 + +### Changed +- `pantools group_info` defaults to all homology groups (!166). +- `pantools map` no longer accepts a genome numbers file and uses `--include`/`--exclude` instead (!170). + +### Fixed +- `pantools consensus_tree` now properly checks if groups were excluded based on trimming (!168). + ## [4.2.1] - 05-06-2023 ### Added diff --git a/docs/source/conf.py b/docs/source/conf.py index 63818a97ddf47d69fb3791c56e1c9b9006562e1c..5349b27201a4e6a2f8271a9212d1901a4c226a07 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -10,8 +10,8 @@ project = 'PanTools' copyright = '2016, the PanTools team' author = 'Sandra Smit' -release = '4.2.1' -version = '4.2.1' +release = '4.2.2' +version = '4.2.2' # -- General configuration diff --git a/docs/source/user_guide/explore.rst b/docs/source/user_guide/explore.rst index b7873fbf977f2ab4145bf9c7417b5adc32cefb8b..3df1bf337e6d6e14f8827af6dbc606b617f48d08 100644 --- a/docs/source/user_guide/explore.rst +++ b/docs/source/user_guide/explore.rst @@ -364,9 +364,6 @@ Report all available information of one or multiple homology groups. * - <databaseDirectory> - Path to the database root directory. - * - <homologyFile> - - A text file with homology group node identifiers, seperated by a - comma. **Options** .. list-table:: @@ -376,6 +373,14 @@ Report all available information of one or multiple homology groups. - Only include a selection of genomes. * - ``--exclude``/``-e`` - Exclude a selection of genomes. + * - ``--homology-file``/``-H`` + - A text file with homology group node identifiers, separated by a comma. + Default is all homology groups. (Mutually exclusive with + ``--homology-groups``.) + * - ``--homology-groups``/``-G`` + - A comma separated list of homology group node identifiers. + Default is all homology groups. (Mutually exclusive with + ``--homology-file``.) * - ``--functions`` - Name of function identifiers from GO, PFAM, InterPro or TIGRAM. To find Phobius (P) or SignalP (S) annotations, include: diff --git a/docs/source/user_guide/mapping.rst b/docs/source/user_guide/mapping.rst index c8abb0a7165ada019ac03c472f16a20819897c14..3da859fef8b2ef9aed66ba087e3e753b67a02879 100644 --- a/docs/source/user_guide/mapping.rst +++ b/docs/source/user_guide/mapping.rst @@ -14,9 +14,6 @@ the analysis. * - <databaseDirectory> - Path to the database root directory. - * - <genomeNumbers> - - A text file containing genome numbers to map reads against in - each line. * - <shortReadFiles> - One or two short-read archives in FASTQ format, which can be gz/bz2 compressed. @@ -30,6 +27,10 @@ the analysis. of cores or 8, whichever is lower. * - ``--output``/``-o`` - Path to the output files (default is the database path). + * - ``--include``/``-i`` + - Only include a selection of genomes. + * - ``--exclude``/``-e`` + - Exclude a selection of genomes. * - ``--best-hits`` = ``none|all|random`` - In case of multiple "best" hits, return none, all best hits or a random best hit (Default: random). @@ -99,22 +100,14 @@ the analysis. + EEEEEEEEEEEEEEEAEEEE/EEEEE/AEEEEEEEEEEEEEE/EE/EEE/<EEEEEEE/EEEEEEEEEEEEEAEEEEEAEEEEEAEEAEEEEEEA<AAAEEAEEA<EE/EEEEAEAEA/EEAA/ - Genome numbers file - - .. code:: text - - 1 - 2 - 5 - **Example commands** .. code:: bash - $ pantools map arabidopsis_DB genome_numbers.txt ERR031564_1.fastq - $ pantools map --include=1-5 --sensitivity=sensitive arabidopsis_DB genome_numbers.txt ERR031564_1.fastq - $ pantools map --competitive -m=all-bests arabidopsis_DB genome_numbers.txt ERR031564_1.fastq - $ pantools map --interleaved arabidopsis_DB genome_numbers.txt interleaved_reads.fastq - $ pantools map arabidopsis_DB genome_numbers.txt ERR031564_1.fastq ERR031564_2.fastq + $ pantools map arabidopsis_DB ERR031564_1.fastq + $ pantools map --include=1-5 --sensitivity=sensitive arabidopsis_DB ERR031564_1.fastq + $ pantools map --competitive -m=all-bests arabidopsis_DB ERR031564_1.fastq + $ pantools map --interleaved arabidopsis_DB interleaved_reads.fastq + $ pantools map arabidopsis_DB ERR031564_1.fastq ERR031564_2.fastq **Output** - **mapping_summary.txt**, number of mapped and unmapped reads per diff --git a/pom.xml b/pom.xml index 6097914c72911dfaaa78834b9ee711a0d8ac8b16..4b37ce257d8f000b1a432db8936be2e35d8d7f54 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ <groupId>nl.wur.bif</groupId> <artifactId>pantools</artifactId> - <version>4.2.1</version> + <version>4.2.2</version> <properties> <maven.compiler.source>8</maven.compiler.source> diff --git a/src/main/java/nl/wur/bif/pantools/cli/ANI.java b/src/main/java/nl/wur/bif/pantools/cli/ANI.java index 4f33521957c8135dd8c0e35864273e450393e32c..3c448968492c50f639dfe8680ef5d23eca7721ff 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/ANI.java +++ b/src/main/java/nl/wur/bif/pantools/cli/ANI.java @@ -19,7 +19,7 @@ import static picocli.CommandLine.*; /** * Calculate Average Nucleotide Identity (ANI) scores between genomes. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "ani", sortOptions = false) public class ANI implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java b/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java index d0496350a7330530db8deef7a7a9afd3d0163452..7fa5366aebe1b78d2e67cb0ec218bdbe1f7ef0a6 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java +++ b/src/main/java/nl/wur/bif/pantools/cli/AddAnnotations.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Adds genomic features to genomes and generates their proteomes. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "add_annotations", sortOptions = false) public class AddAnnotations implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java b/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java index 60cc9bef58ee0547e3d2428241d631718d4689ae..0c0fca2a77e96716c878536871d429cbedbad1c0 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java +++ b/src/main/java/nl/wur/bif/pantools/cli/AddAntiSMASH.java @@ -16,7 +16,7 @@ import static nl.wur.bif.pantools.utils.Globals.*; /** * Include antiSMASH gene clusters into the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "add_antismash", sortOptions = false) public class AddAntiSMASH implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java b/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java index a9cc98b965a6f2a44e87c788a25592318eace0ce..63cc1f5cf46b5ea3906053909a98dff6b7e5a03f 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java +++ b/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Add functional annotations to a pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "add_functions", sortOptions = false) public class AddFunctions implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java b/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java index 55b3a572e4e278ccde7a57b16176cafb93d7348f..1c2536e44e54f69e6752bd935840f40a3a147269 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java +++ b/src/main/java/nl/wur/bif/pantools/cli/AddGenomes.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Include additional genomes to an already available pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "add_genomes", sortOptions = false) public class AddGenomes implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java b/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java index e862bbbbb294c04409116758720b51fda69315ed..ad3044b9d5472a1d3ad4926582b278f4b95d0578 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java +++ b/src/main/java/nl/wur/bif/pantools/cli/AddPhenotypes.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Include phenotype data to the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "add_phenotypes", sortOptions = false) public class AddPhenotypes implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java b/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java index 58aee792ddbf37b6e9ed6d0294a1c695bd7b3e9c..a1b97d30ed294502a29f0018589c99194f7867c4 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java +++ b/src/main/java/nl/wur/bif/pantools/cli/BuildPangenome.java @@ -19,7 +19,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Build a pangenome out of a set of genomes. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "build_pangenome", sortOptions = false) public class BuildPangenome implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java b/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java index 9e837df17446bc2d25b0e6c66907511434f631b4..91e27e5a108f0bc3d41f49509b35231a6bcb71b3 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java +++ b/src/main/java/nl/wur/bif/pantools/cli/BuildPanproteome.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Build a panproteome out of a set of proteins. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "build_panproteome", sortOptions = false) public class BuildPanproteome implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java b/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java index 5ae9ceb556dce1bf6f7c04465cfe90945c838e7c..2b4645288e47dd182e931336969f4e4a77e6ec56 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java +++ b/src/main/java/nl/wur/bif/pantools/cli/BuscoProtein.java @@ -22,7 +22,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Identify BUSCO genes in the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "busco_protein", sortOptions = false, abbreviateSynopsis = true) public class BuscoProtein implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java index 42a7aa87d08a9f866f5474f7666fe352558a68b1..66f2f70f2da41873c6cac3b626b5c73dae73fd1b 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java +++ b/src/main/java/nl/wur/bif/pantools/cli/ChangeGrouping.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Change the active version of the homology grouping. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "change_grouping", sortOptions = false) public class ChangeGrouping implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java b/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java index 32aa1aba7672ce8a8f50463b1f1768087f5c538d..4f8dd270b407d8ec87758d063268703b58b5fa37 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java +++ b/src/main/java/nl/wur/bif/pantools/cli/CompareGO.java @@ -21,7 +21,7 @@ import static picocli.CommandLine.*; /** * For two given GO terms, move up in the GO hierarchy to see if they are related. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "compare_go", sortOptions = false) public class CompareGO implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java b/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java index f67b732a88b2c3cb47dc75cabec3a0fcfeafba06..c2319a9e13ba0c70555aee4fcfc0919d1ea0903b 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java +++ b/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java @@ -18,7 +18,7 @@ import static picocli.CommandLine.*; /** * Create a consensus tree by combining gene trees from homology groups using ASTRAL-Pro. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "consensus_tree", sortOptions = false) public class ConsensusTree implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java b/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java index 620d6fb93580bb8a110b118c09c6e2c76e2a4e3a..0af39d68a869b876ffa5c3b544e47f8dd8dab33a 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java +++ b/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java @@ -21,7 +21,7 @@ import static picocli.CommandLine.*; /** * Create a SNP tree from single-copy genes. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "core_phylogeny", aliases = "core_snp_tree", sortOptions = false) public class CorePhylogeny implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java b/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java index bba3aa8fffc2f60ebda0d94ae11835ee05c434ab..0eaf10e84c87e429ae01002742f1287857171b73 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java +++ b/src/main/java/nl/wur/bif/pantools/cli/CoreUniqueThresholds.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Test the effect of changing the core and unique threshold. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "core_unique_thresholds", sortOptions = false) public class CoreUniqueThresholds implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java b/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java index 50f101f9c32bbff522c6bcfd872c38723a4098fe..381f2055b0e91cea9a9abd13e09d9495ecd2f394 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java +++ b/src/main/java/nl/wur/bif/pantools/cli/CreateTreeTemplate.java @@ -15,7 +15,7 @@ import static picocli.CommandLine.*; /** * Create templates for coloring phylogenetic trees in iTOL. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "create_tree_template", sortOptions = false) public class CreateTreeTemplate implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/ExportPangenome.java b/src/main/java/nl/wur/bif/pantools/cli/ExportPangenome.java index 92cae74ad52d18fd81aaa2afe20377658f910e4e..33b8341cdc71d7629e032a2fd38a9d67401326f4 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/ExportPangenome.java +++ b/src/main/java/nl/wur/bif/pantools/cli/ExportPangenome.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Classify functional annotations as core, accessory or unique. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @CommandLine.Command(name = "export_pangenome", sortOptions = false, abbreviateSynopsis = true) public class ExportPangenome implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java index 20b30d7720f6c74b58091837c7367c7208ef902c..2cf4d42382cc36bbc0bd71c3d63aeb89e3a3b201 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java +++ b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByAnnotation.java @@ -21,7 +21,7 @@ import static picocli.CommandLine.*; * Find genes of interest in the pangenome that share a functional annotation node and extract the nucleotide and * protein sequence. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "find_genes_by_annotation", sortOptions = false) public class FindGenesByAnnotation implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java index d1d2235576cbee6ecef285136bea783230b1b5cf..1a2deba4580003869b7b731a7491094c9e067016 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java +++ b/src/main/java/nl/wur/bif/pantools/cli/FindGenesByName.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.*; /** * Find your genes of interest in the pangenome by using the gene name and extract the nucleotide and protein sequence. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "find_genes_by_name", sortOptions = false) public class FindGenesByName implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java b/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java index 12e143d22b0e3f65ab3964bc707df6d39d424ca6..0c715ce6fb79ac018bb19be8b3aad37244f5d162 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java +++ b/src/main/java/nl/wur/bif/pantools/cli/FindGenesInRegion.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Find genes in a given genomic region. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "find_genes_in_region", sortOptions = false) public class FindGenesInRegion implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java b/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java index 99e5ef9d3072ee2ef89780acc084aa6b74e33ee5..3b8f60853ae504ed3d9c496a7ca3c12d78573de4 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java +++ b/src/main/java/nl/wur/bif/pantools/cli/FunctionOverview.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.*; /** * Create an overview table for each functional annotation type in the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "function_overview", sortOptions = false) public class FunctionOverview implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java b/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java index 9b09de7de7e1b805e7e2650191eb1c8db1786e44..130d284630a753bb12d9d38cb66c0acf648b5e28 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java +++ b/src/main/java/nl/wur/bif/pantools/cli/FunctionalClassification.java @@ -19,7 +19,7 @@ import static picocli.CommandLine.*; /** * Classify functional annotations as core, accessory or unique. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "functional_classification", sortOptions = false, abbreviateSynopsis = true) public class FunctionalClassification implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java b/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java index 29e31b6ff892a1751c5ebeb71584822a66631110..f0e2b9c0262d268aa4be1a74407f2fc37ee259de 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java +++ b/src/main/java/nl/wur/bif/pantools/cli/GOEnrichment.java @@ -22,7 +22,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Identify over or underrepresented GO terms in a set of genes. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "go_enrichment", sortOptions = false) public class GOEnrichment implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java b/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java index 595d0c0473fa3cef93b97e95b9f85d833956f5f4..c10fc40f7fa3f33e87d9773fdcd8242f00bae8c5 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java +++ b/src/main/java/nl/wur/bif/pantools/cli/GeneClassification.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Classify the gene repertoire as core, accessory or unique. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "gene_classification", sortOptions = false, abbreviateSynopsis = true) public class GeneClassification implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/Group.java b/src/main/java/nl/wur/bif/pantools/cli/Group.java index 33a6dd6b4a9e70150d1896360cb78fad0fa66fdc..0339c167b5a5e0f7c46af0cc20666dee47699bc8 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/Group.java +++ b/src/main/java/nl/wur/bif/pantools/cli/Group.java @@ -23,7 +23,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Generate homology groups based on similarity of protein sequences. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "group", sortOptions = false, abbreviateSynopsis = true) public class Group implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java b/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java index 690bd30cc957a4969277124ed797d872689e47eb..281493724fe070026ae64c138237886a467e168f 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java +++ b/src/main/java/nl/wur/bif/pantools/cli/GroupInfo.java @@ -18,14 +18,14 @@ import static picocli.CommandLine.*; /** * Report all available information of one or multiple homology groups. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "group_info", sortOptions = false) public class GroupInfo implements Callable<Integer> { @Spec Model.CommandSpec spec; @ArgGroup private SelectGenomes selectGenomes; - @ArgGroup(multiplicity = "1") private SelectHmGroups selectHmGroups; + @ArgGroup private SelectHmGroups selectHmGroups = new SelectHmGroups(); @ParentCommand @GraphDatabase @@ -61,7 +61,6 @@ public class GroupInfo implements Callable<Integer> { private void setGlobalParameters() throws IOException { setGenomeSelectionOptions(selectGenomes); -// SELECTED_HMGROUPS = selectHmGroups.getHomologyGroups().stream().map(String::valueOf).collect(Collectors.joining(",")); if (genes != null) SELECTED_NAME = genes.toString().replaceAll("[\\[\\]]", ""); if (functions != null) SELECTED_LABEL = functions.toString().replaceAll("[\\[\\]]", ""); diff --git a/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java b/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java index b98e32882ac32b6c35c1e2a73ffc5a6d66af7fae..b42b4183e3ca3ce846ed6aa67fd9ab7718a0854c 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java +++ b/src/main/java/nl/wur/bif/pantools/cli/GroupingOverview.java @@ -15,7 +15,7 @@ import static picocli.CommandLine.*; /** * Create an overview table for every homology grouping in the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "grouping_overview", sortOptions = false) public class GroupingOverview implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java b/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java index fc1b2156d8e5a202c05874c5f6d9c02cad4d21b4..b6e967fed511f77dff8b8bf737c4e97039eafa66 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java +++ b/src/main/java/nl/wur/bif/pantools/cli/KmerClassification.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Calculate the number of core, accessory, unique, k-mer sequences. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "k_mer_classification", aliases = "kmer_classification", sortOptions = false, abbreviateSynopsis = true) public class KmerClassification implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java b/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java index 1577737f69458112f01b10dd4f17565035da722b..398cfe7f3a7e4579f1c15ef8a94daa01fa3eb47f 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java +++ b/src/main/java/nl/wur/bif/pantools/cli/LocateGenes.java @@ -21,7 +21,7 @@ import static picocli.CommandLine.*; /** * Identify and compare gene clusters of from a set of homology groups. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "locate_genes", sortOptions = false, abbreviateSynopsis = true) public class LocateGenes implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/MLSA.java b/src/main/java/nl/wur/bif/pantools/cli/MLSA.java index 517de6278fed9bab444b8f5835410b7132943fc7..0b6410e040900cf55301215a25dd8efacce0727c 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/MLSA.java +++ b/src/main/java/nl/wur/bif/pantools/cli/MLSA.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Step 3/3 of MLSA. Run IQ-tree on the concatenated sequences. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "mlsa", sortOptions = false) public class MLSA implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java b/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java index 14beadae095b88939f6104f8a62e4f48a655074a..0a96cc63e384a50343ad878483f20e425e31d1ca 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java +++ b/src/main/java/nl/wur/bif/pantools/cli/MLSAConcatenate.java @@ -18,7 +18,7 @@ import static picocli.CommandLine.*; /** * Step 2/3 of MLSA. Concatenate the gene selection into a single continuous sequence. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "mlsa_concatenate", sortOptions = false) public class MLSAConcatenate implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java b/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java index 438c30a323b1b45393522feead99739177160dcd..4438c2eb23a1ff4dfd574175a724ce05d878bba2 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java +++ b/src/main/java/nl/wur/bif/pantools/cli/MLSAFindGenes.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Step 1/3 of MLSA. Search and filter suitable genes for the MLSA. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "mlsa_find_genes", sortOptions = false) public class MLSAFindGenes implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/MSA.java b/src/main/java/nl/wur/bif/pantools/cli/MSA.java index 6936fdacbdd6c49000810621e479b865688e1f73..18911e030ea26b075faae8e4346960fa57375772 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/MSA.java +++ b/src/main/java/nl/wur/bif/pantools/cli/MSA.java @@ -26,7 +26,7 @@ import static picocli.CommandLine.*; /** * Create multiple sequence alignments. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "msa", sortOptions = false, abbreviateSynopsis = true) public class MSA implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/Map.java b/src/main/java/nl/wur/bif/pantools/cli/Map.java index de801d48c490856032280c08e84b877eab09f710..8d58dec14a1fd7cf0fbb1f79ed1b994bb361f4be 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/Map.java +++ b/src/main/java/nl/wur/bif/pantools/cli/Map.java @@ -1,6 +1,7 @@ package nl.wur.bif.pantools.cli; import jakarta.validation.constraints.*; +import nl.wur.bif.pantools.cli.mixins.SelectGenomes; import nl.wur.bif.pantools.cli.mixins.ThreadNumber; import nl.wur.bif.pantools.utils.BeanUtils; import nl.wur.bif.pantools.pantools.Pantools; @@ -19,7 +20,7 @@ import static picocli.CommandLine.*; /** * Map single or paired-end short reads to one or multiple genomes in the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "map", sortOptions = false, abbreviateSynopsis = true) @OutputDirectory(directory = "outDirectory") @@ -27,15 +28,12 @@ public class Map implements Callable<Integer> { @Spec static CommandSpec spec; @Mixin private ThreadNumber threadNumber; + @ArgGroup private SelectGenomes selectGenomes; @ParentCommand @GraphDatabase private Pantools pantools; - @Parameters(descriptionKey = "genome-numbers", index = "0+") - @InputFile(message = "{file.genome-numbers}") - Path genomeNumbers; - @Parameters(descriptionKey = "short-read-file", index = "1+", arity = "1..2") @InputFiles(message = "{file.short-read}") Path[] shortReadFiles; @@ -204,7 +202,7 @@ public class Map implements Callable<Integer> { } private void setGlobalParameters() { - PATH_TO_THE_GENOME_NUMBERS_FILE = genomeNumbers.toString(); + setGenomeSelectionOptions(selectGenomes); PATH_TO_THE_FIRST_SRA = shortReadFiles[0].toString(); if (shortReadFiles.length == 2) PATH_TO_THE_SECOND_SRA = shortReadFiles[1].toString(); if (previousRunFile != null) RAW_ABUNDANCE_FILE = previousRunFile.toString(); diff --git a/src/main/java/nl/wur/bif/pantools/cli/Metrics.java b/src/main/java/nl/wur/bif/pantools/cli/Metrics.java index 8920c56d4adc7e19b32d46952febf3b939773e8c..bc1ac60b3097585214d953efb5bb592d58d0d780 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/Metrics.java +++ b/src/main/java/nl/wur/bif/pantools/cli/Metrics.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.*; /** * Generates relevant metrics of the pangenome and the individual genomes and sequences. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "metrics", sortOptions = false) public class Metrics implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java index 1d5c1607cd709760d97c6d386a22c8f77a5f838e..79972424823be3fe9927eaa8a574b235d9d4bfcd 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java +++ b/src/main/java/nl/wur/bif/pantools/cli/MoveGrouping.java @@ -15,7 +15,7 @@ import static picocli.CommandLine.*; /** * Inactivate the currently active homology grouping. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "move_grouping", sortOptions = false) public class MoveGrouping implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java index aa8991ffef07dace734575dfe37a9021c285c83b..c22de667a8816c8414422791f17686cbffd4581b 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java +++ b/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java @@ -22,7 +22,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Find the most suitable settings for <Group>. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "optimal_grouping", sortOptions = false) public class OptimalGrouping implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java b/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java index 14a7e6d970e31555dce447a6cdfe73872f281474..2e457a84f2264ef4b150cf976d4b0e352cf3a748 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java +++ b/src/main/java/nl/wur/bif/pantools/cli/OrderMatrix.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.*; /** * Order the values of a matrix file created by PanTools. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "order_matrix", sortOptions = false) public class OrderMatrix implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java b/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java index d381296070f98214bf3033a9a2407f53a84e211c..11ebfe0ef6bfe87749b1ce235fcf275485307917 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java +++ b/src/main/java/nl/wur/bif/pantools/cli/PangenomeStructure.java @@ -18,7 +18,7 @@ import static picocli.CommandLine.*; /** * Determine the openness of the pangenome based on k-mer sequences. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "pangenome_structure", sortOptions = false, abbreviateSynopsis = true) public class PangenomeStructure implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java index 7048f0b4d6bb42d365654df80f35683632e24817..3ce09ffd9c04550ade6d3513945714b33f377ba9 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveAnnotations.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Remove all the genomic features that belong to annotations. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "remove_annotations", sortOptions = false) public class RemoveAnnotations implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java index c2e2d33845a9a1f5b5bdeb83ba474f0b7bcdd523..85ff4a7efa819c03fb01b1e9e07666c7f65ab173 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveFunctions.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Remove an homology grouping from the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "remove_functions", sortOptions = false) public class RemoveFunctions implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java index 463b9bd74d82b877336d76acef8fad9110250f02..ab70a0e87b6482d844a1ee0916522e38b6def5e0 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveGrouping.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.*; /** * Remove an homology grouping from the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "remove_grouping", sortOptions = false) public class RemoveGrouping implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java b/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java index 63239c9ea608e1dd9a1cf843bfc7b71814da33c8..cb94da121231f7719967fe4dc76ce38479799202 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RemoveNodes.java @@ -19,7 +19,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Remove a selection of nodes and their relationships from the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "remove_nodes", sortOptions = false) @ExcludePatterns(regexp = "nucleotide,sequence,pangenome,genome,degenerate", field = "label", diff --git a/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotypes.java b/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotypes.java index 056370da0d95175882294b495cb391bcb998b737..3c6202eef8553a3903c6071be00ebe798fd36e78 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotypes.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RemovePhenotypes.java @@ -15,7 +15,7 @@ import static picocli.CommandLine.*; /** * Delete phenotype nodes or remove specific phenotype information from the nodes. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "remove_phenotypes", sortOptions = false) public class RemovePhenotypes implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java b/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java index f022b53501a33e44f01b241d5ece7980f1c5742f..c3d182daea437736825c4d3ae25942e6970af536 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RenameMatrix.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.*; /** * Rename the headers of CSV formatted matrix files. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "rename_matrix", sortOptions = false) public class RenameMatrix implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java b/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java index bbd9ea27cc7b44f2a9477416b85ea41c3eba13c2..cd81156b7472c21dbce03c8379b3c91f8646e4a4 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RenamePhylogeny.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * Update or alter the terminal nodes (leaves) of a phylogenic tree. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "rename_phylogeny") public class RenamePhylogeny implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java b/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java index ed09de21fa7292126eacb8bfa1ca62d1ccfd4f2f..79139cfee8dded276332ba5b8edcf701ece2547c 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RetrieveFeatures.java @@ -15,7 +15,7 @@ import static picocli.CommandLine.*; /** * Retrieve the sequence of annotated features from the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "retrieve_features", sortOptions = false) public class RetrieveFeatures implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java b/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java index db50570ab9f217b7b9050cc30ffa8ed4883e709c..31867590e399948c77fdf8f216b7626034fabc28 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RetrieveRegions.java @@ -17,7 +17,7 @@ import static picocli.CommandLine.*; /** * Retrieve the sequence of genomic regions from the pangenome. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "retrieve_regions", sortOptions = false) public class RetrieveRegions implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java b/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java index d2b7a05676fcbaf972e92a3ff0d295ad8a6292a3..1cd092fb7acdc6ba37c45278f5ec78b8909f47b5 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java +++ b/src/main/java/nl/wur/bif/pantools/cli/RootPhylogeny.java @@ -16,7 +16,7 @@ import static picocli.CommandLine.Model.CommandSpec; /** * (Re)root a phylogenetic tree. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "root_phylogeny", sortOptions = false) public class RootPhylogeny implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java b/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java index 97bf8b9303aadaf2c15d799dc84b91ad73c5184b..e0babec840fd7dcb32a1e9ecfca0c7a87f0fe68d 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java +++ b/src/main/java/nl/wur/bif/pantools/cli/ShowGO.java @@ -19,7 +19,7 @@ import static picocli.CommandLine.*; /** * For a given GO term, show the child terms, all parent terms higher in the hierarchy, and connected mRNA nodes. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ @Command(name = "show_go", sortOptions = false) public class ShowGO implements Callable<Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectGenomes.java b/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectGenomes.java index 402adeb0cd7591c429fc08682a0af5c1354fc4c1..8f05de2be09913ff0e847d8699c36ae86c61e695 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectGenomes.java +++ b/src/main/java/nl/wur/bif/pantools/cli/mixins/SelectGenomes.java @@ -12,7 +12,7 @@ import static picocli.CommandLine.*; * Argument group (mutually exclusive) for '--include' and '--exclude' options. Accepts lists (1,2,3), ranges (5-10) * or a combination (1,2,3,5-10) as user input for both options. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class SelectGenomes { diff --git a/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java b/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java index e2cc63504a77358df4db40a44055fdb919b95726..ab92b161d59016d1e3908679c02890a962f05c01 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java +++ b/src/main/java/nl/wur/bif/pantools/cli/mixins/ThreadNumber.java @@ -7,7 +7,7 @@ import static picocli.CommandLine.Option; /** * Mixin class for '--threads' option. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class ThreadNumber { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java b/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java index 0d28c9bf9f58c2db431a9999b0236fb7c678699d..bb1fb05d05c9fb5831765d55acab8de6f0efbd23 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java @@ -12,7 +12,7 @@ import static java.lang.annotation.RetentionPolicy.RUNTIME; /** * Class containing custom Bean validation constraint interfaces. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class Constraints { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/ExcludePatternsValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/ExcludePatternsValidator.java index 67f5d60723abe58ade236fa773d0b0b3212e5d62..b4fb78b6e41dca3a9d07bbe1e9f854db8cde3a45 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/ExcludePatternsValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/ExcludePatternsValidator.java @@ -20,7 +20,7 @@ import static java.util.regex.Pattern.compile; * Custom ConstraintValidator that verifies whether an input string matches 'forbidden' words. Allows queries to enable * yes|no user input for continuing after encountering a forbidden word. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class ExcludePatternsValidator implements ConstraintValidator<ExcludePatterns, Object> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputDirectoryValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputDirectoryValidator.java index 9255ed33175ad677ad354fc1733f12358c7bdf14..90544d58425f5d5a87a7901fbf6e44964ccae26d 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputDirectoryValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputDirectoryValidator.java @@ -10,7 +10,7 @@ import java.nio.file.Path; /** * Custom ConstraintValidator that verifies that an input directory exists and is a directory. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class InputDirectoryValidator implements ConstraintValidator<InputDirectory, Path> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFileValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFileValidator.java index e4b2aa7cf441ff14a35f62e1040e2c8a028ed4cd..02394c2081dd5431ba6c11c9110d4f191dac4f70 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFileValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFileValidator.java @@ -12,7 +12,7 @@ import static nl.wur.bif.pantools.utils.BeanUtils.setViolationMessage; /** * Custom ConstraintValidator that verifies that an input file exists. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class InputFileValidator implements ConstraintValidator<InputFile, Path> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFilesValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFilesValidator.java index 469f18845db789beee0fa366fdbe00ccc4d8019d..df3415f92e9cb442282f81e4874318d8e1b02f2e 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFilesValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/InputFilesValidator.java @@ -12,7 +12,7 @@ import static nl.wur.bif.pantools.utils.BeanUtils.setViolationMessage; /** * Custom ConstraintValidator that verifies that all files in an array of input file exist. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class InputFilesValidator implements ConstraintValidator<InputFiles, Path[]> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MatchIntegerValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MatchIntegerValidator.java index 6aee85b0589ba1647ac7bc91f953b46d54784cf7..c8a11b7867ae47a4b20cf04eb01d536bef2e7db6 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MatchIntegerValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MatchIntegerValidator.java @@ -9,7 +9,7 @@ import jakarta.validation.ConstraintValidatorContext; /** * Custom ConstraintValidator that matches an input integer to a selection of allowed options. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class MatchIntegerValidator implements ConstraintValidator<MatchInteger, Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MinOrZeroValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MinOrZeroValidator.java index 1a6ae670fb45b84ae66b9291c170f3d1438a20ba..6dacce9ccc6c38c640075bd702aa9d301a0a20c6 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MinOrZeroValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/MinOrZeroValidator.java @@ -8,7 +8,7 @@ import jakarta.validation.ConstraintValidatorContext; /** * Custom version of the @Min constraint for optional integers that allows 0 as a valid option. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class MinOrZeroValidator implements ConstraintValidator<MinOrZero, Integer> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/OutputDirectoryValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/OutputDirectoryValidator.java index 476f380ac4d848cb4f0b07ab5399df3b8f3d634c..0f4ec676e7095841b39353cf247925e71410595c 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/OutputDirectoryValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/OutputDirectoryValidator.java @@ -18,7 +18,7 @@ import static nl.wur.bif.pantools.cli.validation.Constraints.*; * Can validate the @ParentCommand decorated field 'pantools' when the given directory is 'databaseDirectory'. * The subcommand needs getters for the Pantools parent class and the output directory path. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class OutputDirectoryValidator implements ConstraintValidator<OutputDirectory, Object> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java index 1a8ec1c6a9751c9ffa05398121920705d1d577c0..6f2969b1583b991d2e83768eead558ec3be84a69 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PantoolsDatabaseValidator.java @@ -15,7 +15,7 @@ import static nl.wur.bif.pantools.utils.BeanUtils.setViolationMessage; /** * Custom ConstraintValidator that validates that the database directory exists and has an existing neo4j graph database. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class PantoolsDatabaseValidator implements ConstraintValidator<GraphDatabase, Pantools> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternListValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternListValidator.java index fe18888e36356e6fe5162613d638283f73669683..d8c1f8e57e70d9782b4af9f0d7519028fcc8c41e 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternListValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternListValidator.java @@ -15,7 +15,7 @@ import static nl.wur.bif.pantools.cli.validation.Constraints.*; /** * Custom version of @Pattern constraint that matches a list of strings with a regular expression. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class PatternListValidator implements ConstraintValidator<Patterns, List<String>> { diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternMapValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternMapValidator.java index 0604a36c6679f5dbfd4720fd7c7fc23633662d14..b0be86928710e5ab5064cc62435997f65cbaa5e0 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternMapValidator.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/PatternMapValidator.java @@ -15,7 +15,7 @@ import static nl.wur.bif.pantools.cli.validation.Constraints.Patterns; /** * Custom version of @Pattern constraint that matches a list of strings with a regular expression that matches a keyset of strings. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public class PatternMapValidator implements ConstraintValidator<Patterns, HashMap<String, String>> { diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java index 8c24b390ab767eadb51b8d3f7994ef5f764784a6..810c5452678434d535fea5eb0b9977955edb88c5 100755 --- a/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/AnnotationLayer.java @@ -1515,11 +1515,8 @@ public class AnnotationLayer { Pantools.logger.warn("Allowed features: gene, coding_gene, mRNA, CDS, exon, intron, tRNA, rRNA"); } - // Verify if the genomes were correctly selected - if (PATH_TO_THE_GENOME_NUMBERS_FILE != null && (skip_genomes != null || target_genome != null)) { - Pantools.logger.error("Select genomes via --feature-type, --skip or --reference. Using multiple arguments is not allowed."); - throw new RuntimeException("Too many genome selection arguments"); - } else if (PATH_TO_THE_GENOME_NUMBERS_FILE == null && skip_genomes == null && target_genome == null) { + // Verify if the genomes were correctly selected + if (skip_genomes == null && target_genome == null) { Pantools.logger.error("Missing a genome selection via --feature-type, --skip or --reference. Using multiple arguments is not allowed."); throw new RuntimeException("Missing genome selection argument"); } @@ -1543,13 +1540,6 @@ public class AnnotationLayer { // When using --label, the array is prepared below BufferedWriter[] out = new BufferedWriter[GENOME_DB.num_genomes]; - if (PATH_TO_THE_GENOME_NUMBERS_FILE != null) { // skip_array is prepared - skip_array = new boolean[total_genomes]; - TreeSet<Integer> genome_numbers_set = retrieve_numbers_from_genome_numbers_file(); - for (int i = 1; i <= total_genomes; i++) { - skip_array[i-1] = !genome_numbers_set.contains(i); // sets true or false in array - } - } try { // start the file writers for (int i = 0; i < total_genomes; i++) { // i+1 is the genome number @@ -1559,7 +1549,7 @@ public class AnnotationLayer { out[i] = new BufferedWriter(new FileWriter(WORKING_DIRECTORY + "retrieval/features/" + SELECTED_LABEL + "s_" + (i+1) + ".fasta")); } } catch (IOException ioe) { - Pantools.logger.error("Failed to read: {}", PATH_TO_THE_GENOME_NUMBERS_FILE); + Pantools.logger.error("Failed to read fasta file."); throw new RuntimeException("File error"); } @@ -1605,36 +1595,6 @@ public class AnnotationLayer { Pantools.logger.info("Output written to:"); Pantools.logger.info("{}retrieval/features/", WORKING_DIRECTORY); } - - /** - * Read --genome-numbers or -gn. Places the numbers in a Set that automatically sorts the numbers in ascending order - * @return - */ - public TreeSet<Integer> retrieve_numbers_from_genome_numbers_file() { - String line; - TreeSet<Integer> genome_numbers = new TreeSet<>(); - try { - BufferedReader in; in = new BufferedReader(new FileReader(PATH_TO_THE_GENOME_NUMBERS_FILE)); - while ((line = in.readLine()) != null) { - line = line.trim(); - if (line.equals("")) { - continue; - } - try { - int genome = Integer.parseInt(line.trim()); - genome_numbers.add(genome); - } catch(NumberFormatException e) { - Pantools.logger.info("{}is not a valid genome number.", line.trim()); - } - - } - in.close(); - } catch (IOException e) { - Pantools.logger.error(e.getMessage()); - System.exit(1); - } - return genome_numbers; - } /** * Translates a transcript to a protein sequence diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java index 50f4dd70dcc0a1b3211ecad5a9ffd7855aac9ab0..967764310143dfe15aec9837e721d5fbccfa501a 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java @@ -1036,8 +1036,9 @@ public class Classification { * Optional * -label */ - public void homology_group_info(List<Long> homologyGroups) throws RuntimeException { + public void homology_group_info(List<Long> homologyGroups) throws RuntimeException, IOException { Pantools.logger.info("Reporting all information from selected homology groups."); + StringBuilder output_builder = new StringBuilder(); HashSet<String> groups_with_function = new HashSet<>(); HashMap<String, HashSet<String>> groups_with_function_or_name = new HashMap<>(); @@ -8543,7 +8544,7 @@ public class Classification { if (gene_or_kmer.equals("kmer")) { rscript.append(create_pangenome_size_rscript_kmer("")); - rscript.append(create_pangenome_size_rscript_kmer("unique")); + rscript.append(create_pangenome_size_rscript_kmer("_unique")); } else if (gene_or_kmer.equals("gene")) { rscript.append(create_pangenome_size_rscript_gene()); } else { diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java index da46bd2747c81a85edf4d22adb12cac94059c912..b1fb25ec235b1b795876bb562e3258482e30f325 100755 --- a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java @@ -33,6 +33,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.zip.GZIPInputStream; import static nl.wur.bif.pantools.pangenome.Classification.genome_overview; +import static nl.wur.bif.pantools.pangenome.create_skip_arrays.create_skip_arrays; import static nl.wur.bif.pantools.utils.Globals.*; import static nl.wur.bif.pantools.utils.Utils.complement; import static nl.wur.bif.pantools.utils.Utils.*; @@ -1739,6 +1740,7 @@ public class GenomeLayer { OUTPUT_PATH += "read_mapping/"; new File(WORKING_DIRECTORY + "read_mapping").mkdir(); } + create_skip_arrays(false, true); report_number_of_threads(); int i, j, t, genome; Node pangenome_node; @@ -1754,13 +1756,6 @@ public class GenomeLayer { paired = PATH_TO_THE_SECOND_SRA != null; if (INTERLEAVED) { paired = true; // both read pairs are stored in the same file - } - if (PATH_TO_THE_GENOME_NUMBERS_FILE == null && target_genome == null) { - Pantools.logger.error("No genomes were selected to align to."); - System.exit(1); - } else if (PATH_TO_THE_GENOME_NUMBERS_FILE != null && target_genome != null) { - Pantools.logger.error("Include ONLY genome number using a file with --genome-number OR provide the genome number with --reference."); //TODO: this should be impossible - System.exit(1); } FastqReader[] reader = new FastqReader[2]; @@ -1899,66 +1894,20 @@ public class GenomeLayer { public ArrayList<Integer>[] retrieve_genomes_to_map_against() { ArrayList<Integer>[] genome_numbers; genome_numbers = new ArrayList[THREADS]; - int t, number, n = 0; - String line; - BufferedReader in; + int t, genome_nr; for (t = 0; t < THREADS; ++t) { genome_numbers[t] = new ArrayList(); } - if (PATH_TO_THE_GENOME_NUMBERS_FILE != null) { // --genome-numbers was included - try { - in = new BufferedReader(new FileReader(PATH_TO_THE_GENOME_NUMBERS_FILE)); - for (n = 0; (line = in.readLine()) != null; ) { - line = line.trim(); - if (line.equals("")) - continue; - number = Integer.parseInt(line); - if (number > 0 && number <= GENOME_DB.num_genomes) { - for (t = 0; t < THREADS; ++t) { - genome_numbers[t].add(number); - } - num_shared_mapping[number] = new AtomicLong(0); - num_unique_mapping[number] = new AtomicLong(0); - num_unmapped[number] = new AtomicLong(0); - ++n; - } else { - Pantools.logger.warn("Genome {} is not found in the database.", number); - } - } - in.close(); - } catch (Exception ex) { - Pantools.logger.warn("Error in reading genome numbers."); - } - } else { // --reference was included - target_genome = target_genome.replace(" ",""); - if (target_genome.endsWith(",")) { - target_genome = target_genome.replaceFirst(".$",""); // remove last character - } - String[] temp_target_array = target_genome.split(","); - ArrayList<Integer> target_genome_list = new ArrayList<>(); - for (String genome_str : temp_target_array) { - if (genome_str.contains("-")) { - String[] genome_array = genome_str.split("-"); - int start = Integer.parseInt(genome_array[0]); - int end = Integer.parseInt(genome_array[1]); - for (int i= start; i <= end; i++) { - target_genome_list.add(i); - } - } else { - target_genome_list.add(Integer.parseInt(genome_str)); - } - } - for (int genome_nr : target_genome_list) { - if (genome_nr > 0 && genome_nr <= GENOME_DB.num_genomes) { - for (t = 0; t < THREADS; ++t) { - genome_numbers[t].add(genome_nr); - } - num_shared_mapping[genome_nr] = new AtomicLong(0); - num_unique_mapping[genome_nr] = new AtomicLong(0); - num_unmapped[genome_nr] = new AtomicLong(0); - } else { - Pantools.logger.warn("Genome {} is not found in the database.", genome_nr); + for (genome_nr = 1; genome_nr <= GENOME_DB.num_genomes; genome_nr++) { + if (!skip_list.contains(genome_nr)) { + for (t = 0; t < THREADS; ++t) { + genome_numbers[t].add(genome_nr); } + num_shared_mapping[genome_nr] = new AtomicLong(0); + num_unique_mapping[genome_nr] = new AtomicLong(0); + num_unmapped[genome_nr] = new AtomicLong(0); + } else { + Pantools.logger.warn("Genome {} is not found in the database.", genome_nr); } } if (ALIGNMENT_MODE < 0) { // when competitive mapping diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java b/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java index 5362815ef6c7a3474f1086a657c6a428b2fc74d6..1f63e81b25524ca1336b340953f26765e21b9719 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/MultipleSequenceAlignment.java @@ -33,6 +33,7 @@ public class MultipleSequenceAlignment { private final String alignmentTypeCapital; // can be "Protein", "Nucleotide" or "Variants" private final boolean pavs; private final List<Long> hmGroups; + private final Set<String> excludedGroups; private ArrayList<Node> hmNodeList; private ArrayList<String> msaNames; @@ -53,6 +54,7 @@ public class MultipleSequenceAlignment { this.alignVariants = alignVariants; this.pavs = pavs; this.hmGroups = hmGroups; + this.excludedGroups = new HashSet<>(); // set alignmentTypeShort if (alignProtein) { @@ -2087,7 +2089,6 @@ public class MultipleSequenceAlignment { int groupCounter = 0; ArrayList<String> suitableGroups = new ArrayList<>(); - ArrayList<String> excludedGroups = new ArrayList<>(); for (String groupId : msaNames) { groupCounter ++; @@ -2141,7 +2142,7 @@ public class MultipleSequenceAlignment { * @param excludedGroups alignments not suitable for trimming * @return true if trimmed, false if not */ - private boolean checkIfGroupIsTrimmed(String groupId, Path inPath, ArrayList<String> suitableGroups, ArrayList<String> excludedGroups) { + private boolean checkIfGroupIsTrimmed(String groupId, Path inPath, ArrayList<String> suitableGroups, Set<String> excludedGroups) { boolean pass2 = check_if_file_exists(inPath.resolve("not_trimmed").toString()); if (pass2) { excludedGroups.add(groupId); // long + str to convert to a string @@ -2448,6 +2449,16 @@ public class MultipleSequenceAlignment { return hmNodeList; } + + /** + * Checks if a homology group is excluded based on trimming + * @param group homology group node identifier + * @return boolean determining if the group is excluded + */ + public boolean isExcluded(long group) { + return excludedGroups.contains(Long.toString(group)); + } + /** * Gives the private variable msaNames * @return private variable msaNames diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java b/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java index 59e9dd386421e5d933570d68975dfbead545ff79..7c1730dd060fff775edd37047bd2b6fab3b4e75e 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java @@ -2635,9 +2635,11 @@ public class Phylogeny { for (Node hm_node : hmNodeList) { group_counter++; Pantools.logger.debug("Reading gene tree: {}/{}.", group_counter, total_groups); + if (msa.isExcluded(hm_node.getId())) continue; gene_tree_file = outDirMsa.resolve(hm_node.getId() + "") .resolve("output") .resolve(alignmentTypeShort + trimming + ".newick"); + gene_tree = read_newick_file(gene_tree_file); Pantools.logger.debug("Gene tree {} has polytomies: {}", gene_tree_file, hasPolytomies(gene_tree)); diff --git a/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java b/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java index d55708eeee6cc068766895cf2cde8720e36988e6..fd09b87d4f2bfa052bb8f0009b288d3e4caae389 100644 --- a/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java +++ b/src/main/java/nl/wur/bif/pantools/pantools/Pantools.java @@ -52,7 +52,7 @@ import static picocli.CommandLine.ScopeType.INHERIT; * Implements the main function and declares shared variables. Initializes command line logic. * Contains the --help and --version commands of the main function. * - * @author Roel van Esch, Bioinformatics Group, Wageningen University, the Netherlands. + * @author Robin van Esch, Bioinformatics Group, Wageningen University, the Netherlands. */ @Command(name = "pantools", @@ -334,7 +334,7 @@ public class Pantools { * Custom IHelpSectionRenderer class to use in 'pantools --help'. * Orders the pantools commands by category. * - * @author Roel van Esch + * @author Robin van Esch */ static class CommandGroupRenderer implements IHelpSectionRenderer { @@ -432,7 +432,7 @@ public class Pantools { * Custom IVersionProvider class to use in 'pantools --version'. * Gets the most up to date version information from git. * - * @author Roel van Esch TODO: add authors of getVersionInformation() function (originally in utils/Utils.java) + * @author Robin van Esch TODO: add authors of getVersionInformation() function (originally in utils/Utils.java) */ protected static class GitVersionProvider implements IVersionProvider { diff --git a/src/main/java/nl/wur/bif/pantools/utils/BeanUtils.java b/src/main/java/nl/wur/bif/pantools/utils/BeanUtils.java index 837e81bbe610b7bbde7e0f674af5e1f337f59077..492ea74a456fe04bbce02a9160f5f412f915bb74 100644 --- a/src/main/java/nl/wur/bif/pantools/utils/BeanUtils.java +++ b/src/main/java/nl/wur/bif/pantools/utils/BeanUtils.java @@ -16,7 +16,7 @@ import java.util.Set; /** * Contains functions for jakarta bean hibernate validation. * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public final class BeanUtils { diff --git a/src/main/java/nl/wur/bif/pantools/utils/ConsoleUtils.java b/src/main/java/nl/wur/bif/pantools/utils/ConsoleUtils.java index c8443617257a25cdff6d590a6e20235cdfc1c19d..90f2cd73e3ce59d3254f02b4b80897c1a238b507 100644 --- a/src/main/java/nl/wur/bif/pantools/utils/ConsoleUtils.java +++ b/src/main/java/nl/wur/bif/pantools/utils/ConsoleUtils.java @@ -9,7 +9,7 @@ import static java.util.regex.Pattern.CASE_INSENSITIVE; /** * Class for console input functions using Scanner * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public final class ConsoleUtils { diff --git a/src/main/java/nl/wur/bif/pantools/utils/Globals.java b/src/main/java/nl/wur/bif/pantools/utils/Globals.java index a72d1221c6d39d9db0d667a5d9953838d834b54b..d22e7aafae4c2485d60b22c93938dc04fde6b8ce 100644 --- a/src/main/java/nl/wur/bif/pantools/utils/Globals.java +++ b/src/main/java/nl/wur/bif/pantools/utils/Globals.java @@ -77,7 +77,6 @@ public final class Globals { public static String PATH_TO_THE_PROTEOMES_FILE; public static String PATH_TO_THE_ANNOTATIONS_FILE; public static String PATH_TO_THE_REGIONS_FILE; - public static String PATH_TO_THE_GENOME_NUMBERS_FILE; public static String RAW_ABUNDANCE_FILE = ""; public static String PATH_TO_THE_FIRST_SRA; public static String PATH_TO_THE_SECOND_SRA; diff --git a/src/main/java/nl/wur/bif/pantools/utils/StringUtils.java b/src/main/java/nl/wur/bif/pantools/utils/StringUtils.java index c1b4a6d64942091347031c9f48b7f193fe0d349c..9627bff837e36e62a5ebe8a7cf20bcd9538cbddf 100644 --- a/src/main/java/nl/wur/bif/pantools/utils/StringUtils.java +++ b/src/main/java/nl/wur/bif/pantools/utils/StringUtils.java @@ -9,7 +9,7 @@ import java.util.stream.IntStream; /** * Class with string parsing utility functions * - * @author Roel van Esch, Wageningen University, the Netherlands. + * @author Robin van Esch, Wageningen University, the Netherlands. */ public final class StringUtils { diff --git a/src/main/java/nl/wur/bif/pantools/utils/Utils.java b/src/main/java/nl/wur/bif/pantools/utils/Utils.java index 4714bec40287a3e683e33271b1684cc606c80d4e..713cf56b25c12cdfab3bb5946e562b46cf294e88 100644 --- a/src/main/java/nl/wur/bif/pantools/utils/Utils.java +++ b/src/main/java/nl/wur/bif/pantools/utils/Utils.java @@ -1285,6 +1285,7 @@ public final class Utils { try (BufferedReader br = new BufferedReader(new FileReader(hmFile.toFile()))) { String line; while ((line = br.readLine()) != null) { + if (line.startsWith("#")) continue; String[] hmGroup = line.split(","); for (String s : hmGroup) { hmGroups.add(Long.parseLong(s)); diff --git a/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java b/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java index 9ff2909ac57b586725c336409213962c6f7bc16b..e699ba0b7f05ca676785126747fd9a32d9f3ec90 100644 --- a/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java +++ b/src/test/java/nl/wur/bif/pantools/cli/validation/ConstraintsTest.java @@ -21,7 +21,7 @@ import static org.junit.jupiter.api.Assertions.*; /** * Unit tests for all constraints within the Constraints class. * - * @author Roel van Esch, Bioinformatics group, Wageningen University, the Netherlands + * @author Robin van Esch, Bioinformatics group, Wageningen University, the Netherlands */ class ConstraintsTest { diff --git a/src/test/java/nl/wur/bif/pantools/pangenome/ProteomeLayerTest.java b/src/test/java/nl/wur/bif/pantools/pangenome/ProteomeLayerTest.java index b656f4841aa1629589a56c987ab30f4fc3cfd96b..ec5d0bc67d5d25a3fd58f6cfbb9f1996dcd1b9c7 100644 --- a/src/test/java/nl/wur/bif/pantools/pangenome/ProteomeLayerTest.java +++ b/src/test/java/nl/wur/bif/pantools/pangenome/ProteomeLayerTest.java @@ -28,7 +28,7 @@ import static org.junit.jupiter.api.Assertions.*; /** * Unit tests for all functions within the ProteomeLayer class. * - * @author Roel van Esch, Bioinformatics group, Wageningen University, the Netherlands + * @author Robin van Esch, Bioinformatics group, Wageningen University, the Netherlands */ public class ProteomeLayerTest { diff --git a/tests/Snakefile b/tests/Snakefile index e5a1b08f5c67389d0418111db438960141f945fc..28a202f071406001960eca0cca00d5242d2ea6f0 100644 --- a/tests/Snakefile +++ b/tests/Snakefile @@ -9,12 +9,12 @@ It will check: To execute, specify a configuration file with the data set, e.g.: -snakemake --cores 1 --configfile yeast-1.yaml +snakemake --cores 1 --configfile yeast.yaml The pipeline will package the local version into a .jar, as well as a reference -version (a git revision) specified in shared.yaml. It will download the dataset +version (a git revision) specified in shared.yaml. It will use the dataset referenced by the configuration file passed in by the user with --configfile -(e.g. yeast-1.yaml, yeast-4.yaml), and download and untar the dataset. +(e.g. yeast.yaml, etc.). A pangenome will be built with `build_pangenome` for the local and reference versions each. Pangenomes will be exported with `export_pangenome`, the @@ -125,68 +125,42 @@ rule export_pangenome: --sequence-node-anchors-file {output.sequence_node_anchors} """ -rule download_dataset: - """Download the data set with the provided name.""" - # TODO: URL should be changed once data is uploaded - # TODO: md5sum check? - # TODO: split genomes.txt generation from downloading data set - # TODO: standardize data set formats, file names, etc. +rule list_genomes: + """Lists the input genomes.""" + input: + genomes=config["dataset"]["genomes"], output: genome_list="input/data/{dataset_name}/genomes/genomes.txt", - fastq1="input/data/{dataset_name}/reads/1.fastq", - fastq2="input/data/{dataset_name}/reads/2.fastq" - params: - url=config["dataset"]["url"] shell: """ - mkdir -p "input/data/{wildcards.dataset_name}/" - - # TODO: for some reason a streaming untar does not seem to work (anymore) - curl -o dataset.tar --silent {params.url} - tar xf dataset.tar -C input/data/{wildcards.dataset_name}/ - rm dataset.tar - - find input/data/{wildcards.dataset_name}/genomes/ -iname '*.fasta' | sort > {output.genome_list} - - mv input/data/{wildcards.dataset_name}/reads/*_1.fastq {output.fastq1} - mv input/data/{wildcards.dataset_name}/reads/*_2.fastq {output.fastq2} + ls {input.genomes} | sort > {output.genome_list} """ rule map_reads: """Map reads against a a database, outputs a SAM file per genome.""" input: - fastq1="input/data/{dataset_name}/reads/1.fastq", - fastq2="input/data/{dataset_name}/reads/2.fastq", - genome_list="input/data/{dataset_name}/genomes/genomes.txt", + fastq1=config["dataset"]["reads"]["fastq1"], + fastq2=config["dataset"]["reads"]["fastq2"], database="output/databases/{dataset_name}/{version}/", jar="jars/pantools/target/pantools-{version}.jar" output: sam_files=expand( "output/alignments/{{dataset_name}}/{{version}}/sams/pantools_{genome}.sam", genome=range(1, len(config["dataset"]["genomes"]) + 1) - ), - genome_numbers_file="output/alignments/{dataset_name}/{version}/genome-numbers.txt" - params: - num_genomes=len(config["dataset"]["genomes"]) + ) shell: """ - mkdir -p $(dirname {output.genome_numbers_file}) - - seq 1 {params.num_genomes} > {output.genome_numbers_file} - {java} \ -jar {input.jar} \ map \ --threads 1 \ {input.database} \ - {output.genome_numbers_file} \ {input.fastq1} \ {input.fastq2} \ --gap-open -20 \ --gap-extension -3 \ --out-format SAM \ - --output $(dirname {output.sam_files[0]}) \ - # --reference 1-$(wc -l <{input.genome_list} | sed 's/ //g') + --output $(dirname {output.sam_files[0]}) """ rule strip_sam_pg_header_line: diff --git a/tests/shared.yaml b/tests/shared.yaml index fe62639baa6e31abe0f4b13d19cd24465b95bc5e..c32a5ae8adbd6e3e992469c1f3c948c1f393f7e3 100644 --- a/tests/shared.yaml +++ b/tests/shared.yaml @@ -2,7 +2,7 @@ tools: pantools: repository_url: https://git.wur.nl/bioinformatics/pantools.git - reference_version: 8da5bd05 + reference_version: 0c4a3485 java_arguments: - -Xmx8g maven: diff --git a/tests/yeast-1.yaml b/tests/yeast-1.yaml deleted file mode 100644 index 0eebe780084daa5fc2c324df35a7a85cef609534..0000000000000000000000000000000000000000 --- a/tests/yeast-1.yaml +++ /dev/null @@ -1,8 +0,0 @@ -dataset: - name: "yeast-1" - url: "https://www.bioinformatics.nl/pangenomics/data/ci/yeast-1.tar.gz" - genomes: - - GCF_000146045.2_R64_genomic.fasta - reads: - fastq1: 1_MSv3_1.fastq - fastq2: 1_MSv3_2.fastq diff --git a/tests/yeast-4.yaml b/tests/yeast-4.yaml deleted file mode 100644 index 815abcef0a45e14b20c26728fc2f25770bb66c11..0000000000000000000000000000000000000000 --- a/tests/yeast-4.yaml +++ /dev/null @@ -1,11 +0,0 @@ -dataset: - name: "yeast-4" - url: "https://www.bioinformatics.nl/pangenomics/data/ci/yeast-4.tar.gz" - genomes: - - GCA_000167035.1_ASM16703v1_genomic.fasta - - GCA_000256765.1_Saccharomyces_kudriavzevii_strain_FM1066_v1.0_genomic.fasta - - GCF_000146045.2_R64_genomic.fasta - - GCF_001298625.1_SEUB3.0_genomic.fasta - reads: - fastq1: 1_MSv3_1.fastq - fastq2: 1_MSv3_2.fastq diff --git a/tests/yeast.yaml b/tests/yeast.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1dedfdae4341334b7bda377065caa9c0fdb9b8eb --- /dev/null +++ b/tests/yeast.yaml @@ -0,0 +1,11 @@ +dataset: + name: "yeast" + genomes: + - yeast_tiny_pangenome/genomes/273614_chrI.fna + - yeast_tiny_pangenome/genomes/BY_chrI.fna + - yeast_tiny_pangenome/genomes/CBS2888_chrI.fna + - yeast_tiny_pangenome/genomes/S288C_chrI.fna + - yeast_tiny_pangenome/genomes/Y10_chrI.fna + reads: + fastq1: yeast_tiny_pangenome/reads/SRR800844_chrI_1.fq.gz + fastq2: yeast_tiny_pangenome/reads/SRR800844_chrI_2.fq.gz