Commit 5af85c81 authored by Workum, Dirk-Jan van's avatar Workum, Dirk-Jan van
Browse files

merge release_v4.0.0 into develop and solve merge conflicts

parents 5e6595f7 149a96db
Pipeline #57810 passed with stage
in 51 seconds
......@@ -8,9 +8,11 @@ All notable changes to Pantools will be documented in this file.
### Changed
### Fixed
## [3.5] - TODO: add date for release
## [4.0.0] - TODO: add date for release
### Added
- CI/CD pipeline that for now only runs `mvn test` (merge request !41).
......@@ -62,8 +64,10 @@ All notable changes to Pantools will be documented in this file.
- Replaced method for Fisher exact test to correctly deal with larger values (merge request !66).
- `go_enrichment` no longer crashes when antiSMASH geneclusters were added to the pangenome (merge request !78).
- `add_annotations` now handles co-features correctly (merge request !79).
- Distinguish between homology groups as file or as list for `msa`, `core_snp_tree` and `consensus_tree` (merge request !86).
- `build_panproteome` no longer creates inconsistent 'header' and 'protein_ID' properties (merge request !89).
## [3.4] - 2021-05-04
## [3.4.0] - 2022-05-04
### Added
- Version and commit ID are reported when PanTools is initialized.
......
......@@ -53,18 +53,18 @@ Note: tests are broken at the moment, which is why we're skipping the Maven test
-->
`mvn package` will generate two jar files in the `target/` directory.
The standalone jar file is named `pantools-<VERSION>.jar`, with `<VERSION>` being the PanTools version you have checked out.
To run it, use (we'll take version `3.4` as an example):
The standalone jar file is named `pantools-<version>.jar`, with `<version>` being the PanTools version you have checked out.
To run it, use:
```bash
java -jar target/pantools-3.4.jar -h
java -jar target/pantools-<version>.jar -h
```
Finally, we recommend making use of an alias to make running PanTools easier.
For example, add the following to your `~/.bashrc` file:
```bash
alias pantools="java -jar <PATH_TO_PANTOOLS>/target/pantools-3.4.jar"
alias pantools="java -jar <path/to/pantools>/target/pantools-<version>.jar"
```
where `<PATH_TO_PANTOOLS>` is the path to the directory where you checked out PanTools.
where `<path/to/pantools>` is the path to the directory where you checked out PanTools.
Then, you can run PanTools by simply typing `pantools` in your terminal.
## Contact
......
......@@ -6,8 +6,8 @@ project = 'PanTools'
copyright = '2016, the PanTools team'
author = 'Sandra Smit'
release = '0.1'
version = '0.1.0'
release = '4,0.0'
version = '4,0.0'
# -- General configuration
......@@ -18,7 +18,8 @@ extensions = [
'sphinx.ext.autosummary',
'sphinx.ext.intersphinx',
'sphinx.ext.autosectionlabel',
'sphinx.ext.mathjax'
'sphinx.ext.mathjax',
'extensions.substitutioncodeblock'
]
intersphinx_mapping = {
......@@ -51,6 +52,9 @@ mathjax3_config = {'chtml': {'displayAlign': 'left',
# CSS files
html_css_files = ['css/custom.css', 'css/s5defs-roles.css']
# Code substitutions
substitutions = [('|ProjectVersion|', version)]
# Prolog
rst_prolog = """
.. include:: <s5defs.txt>
......@@ -60,4 +64,11 @@ rst_prolog = """
<br>
"""
\ No newline at end of file
"""
# Epilog
rst_epilog = """
.. |ProjectVersion| replace:: {versionnum}
""".format(
versionnum = version,
)
\ No newline at end of file
......@@ -784,7 +784,7 @@ database.
Functional databases
""""""""""""""""""""
Database versions in v3.4 repository
Database versions in v|ProjectVersion| repository
.. list-table::
:widths: 25 25 50
......@@ -1078,7 +1078,7 @@ Options
.. list-table::
:widths: 30 70
* - ```--annotations-file``/``-A``
* - ``--annotations-file``/``-A``
- A text file with the identifiers of annotations to be included,
each on a separate line. The most recent annotation is selected for
genomes without an identifier.
......
"""
Custom Sphinx extension for code blocks allowing substitution.
"""
from typing import List
from sphinx.application import Sphinx
from sphinx.directives.code import CodeBlock
class SubstitutionCodeBlock(CodeBlock): # type: ignore
"""
Similar to CodeBlock but replaces placeholders with variables.
"""
def run(self) -> List:
"""
Replace placeholders with given variables.
"""
app = self.state.document.settings.env.app
new_content = []
self.content = self.content # type: List[str]
existing_content = self.content
for item in existing_content:
for pair in app.config.substitutions:
original, replacement = pair
item = item.replace(original, replacement)
new_content.append(item)
self.content = new_content
return list(CodeBlock.run(self))
def setup(app: Sphinx) -> None:
"""
Add the custom directives to Sphinx.
"""
app.add_config_value('substitutions', [], 'html')
app.add_directive('substitution-code-block', SubstitutionCodeBlock)
\ No newline at end of file
PanTools version 3.5
PanTools version |ProjectVersion|
====================
PanTools is a toolkit for comparative analysis of large number of
......@@ -65,9 +65,9 @@ Add the path to the java archive of PanTools, located in the
*pantools/target* subdirectory, to the OS path environment variable.
Then run PanTools from the command line by:
.. code:: bash
.. substitution-code-block:: bash
$ java <JVM options> -jar pantools-3.4.jar <subcommand> <arguments>
$ java <JVM options> -jar pantools-|ProjectVersion|.jar <subcommand> <arguments>
| **Useful JVM options**
| - **-server** : To optimize JIT compilations for higher performance
......
......@@ -12,19 +12,17 @@ For PanTools developers:
Download PanTools
-----------------
Clone PanTools from our repository and checkout to the 3.4 release branch.
Clone PanTools v|ProjectVersion| from our repository.
.. code:: bash
.. substitution-code-block:: bash
$ git clone https://git.wur.nl/bioinformatics/pantools
$ cd pantools
$ git checkout pantools_v3.4
$ git clone --branch v|ProjectVersion| https://git.wur.nl/bioinformatics/pantools
Test if PanTools is executable.
.. code:: bash
.. substitution-code-block:: bash
$ java -jar target/pantools-3.4.jar
$ java -jar target/pantools-|ProjectVersion|.jar
If the help page does not appear this (likely) means you don't have a
properly working Java version 8. Java is included in the PanTools conda
......@@ -41,16 +39,16 @@ command. Always include the **full** path to PanTools' .jar file.
If Java is set to your $PATH.
.. code:: bash
.. substitution-code-block:: bash
$ echo "alias pantools='java -Xms20g -Xmx50g -jar /YOUR_FULL_PATH/pantools/target/pantools-3.4.jar'" >> ~/.bashrc
$ echo "alias pantools='java -Xms20g -Xmx50g -jar /YOUR_FULL_PATH/pantools/target/pantools-|ProjectVersion|.jar'" >> ~/.bashrc
If Java is not set to your $PATH, include the **full** path in the
alias. Replace 'YOUR_PATH' 2x with the correct directory structure.
.. code:: bash
.. substitution-code-block:: bash
$ echo "alias pantools='/YOUR_PATH/jdk1.8.0_161/bin/java -Xms20g -Xmx50g -jar /YOUR_PATH/pantools/target/pantools-3.4.jar'" >> ~/.bashrc
$ echo "alias pantools='/YOUR_PATH/jdk1.8.0_161/bin/java -Xms20g -Xmx50g -jar /YOUR_PATH/pantools/target/pantools-|ProjectVersion|.jar'" >> ~/.bashrc
Source your ~/.bashrc and test if the alias works.
......
......@@ -30,7 +30,8 @@ created with **FastTree**.
from the list above requires use of the ``--method`` argument. For
aligning multiple homology groups, please use
``--method=multiple-groups`` with the homology groups specified in a
csv file on a single line (can be added with ``--homology-file=/path/to/hm.csv``.
csv file on a single line (can be added with
``--homology-file=/path/to/hm.csv`` or ``--homology-groups=2,3,4,4``).
For aligning regions, please use ``--method=regions`` with a regions
file that is added with ``--regions-file=/path/to/rf.txt``. For aligning
sequences based on a functional domain, please use
......@@ -99,7 +100,10 @@ Options
- Exclude a selection of genomes.
* - ``--homology-file``/``-H``
- A text file with homology group node identifiers, separated by a comma.
Default is all homology groups.
Default is all homology groups. (Mutually exclusive with ``--homology-groups``.)
* - ``--homology-groups``/``-G``
- A comma separated list of homology group node identifiers.
Default is all homology groups. (Mutually exclusive with ``--homology-file``.)
* - ``--regions-file``/``-R``
- A text file containing genome locations with on each line: a genome
number, sequence number, begin and end position, separated by a space.
......
......@@ -18,7 +18,7 @@ All functions produce tree files in Newick format that can be visualized
with iTOL or any other phylogenetic tree visualization software.
- :ref:`phylogeny:rename phylogeny`
- :ref:`phylogeny:reroot phylogeny`
- :ref:`phylogeny:root phylogeny`
- :ref:`phylogeny:create tree template`
--------------
......@@ -79,6 +79,12 @@ Options
- A file with homology group node identifiers of single copy groups.
Default is single_copy_orthologs.csv, generated in the previous
:ref:`gene_classification <characterize:gene classification>` run.
(Mutually exclusive with ``--homology-groups``.)
* - ``--homology-groups``/``-G``
- A comma separated list of homology group node identifiers of single
copy groups. Default is single_copy_orthologs.csv, generated in
the previous :ref:`gene_classification <characterize:gene classification>` run.
(Mutually exclusive with ``--homology-file``.)
* - ``--protein``
- Use proteins instead of nucleotide sequences.
* - ``--phenotype``/``-p``
......@@ -210,9 +216,12 @@ Options
- Number of parallel working threads, default is the number of cores
or 8, whichever is lower.
* - ``--homology-file``/``-H``
- A file with homology group node identifiers. Default is
all_homology_groups.csv, generated in the previous
:ref:`gene_classification <characterize:gene classification>` run.
- A file with homology group node identifiers. Default is all
homology groups. (Mutually exclusive with ``--homology-groups``.)
* - ``--homology-groups``/``-G``
- A comma separated list of homology group node identifiers. Default
is all homology groups. (Mutually exclusive with
``--homology-file``.)
* - ``--polytomies``
- Allow polytomies for ASTRAL-PRO.
......
......@@ -2,7 +2,7 @@
go.basic.obo;GO;addons;http://purl.obolibrary.org/obo/go/go-basic.obo
gene_ontology.txt;Pfam;addons;ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases//Pfam35.0/database_files/gene_ontology.txt.gz
Pfam-A.clans.tsv;Pfam;addons;ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases//Pfam35.0/Pfam-A.clans.tsv.gz
interpro.xml;InterPro;addons;https://ftp.ebi.ac.uk/pub/databases/interpro/interpro.xml.gz
interpro.xml;InterPro;addons;https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/interpro.xml.gz
TIGRFAMS_GO_LINK;TIGRFAM;addons/tigrfam;https://ftp.ncbi.nlm.nih.gov/hmm/TIGRFAMs/release_15.0/TIGRFAMS_GO_LINK
TIGRFAMS_ROLE_LINK;TIGRFAM;addons/tigrfam;https://ftp.ncbi.nlm.nih.gov/hmm/TIGRFAMs/release_15.0/TIGRFAMS_ROLE_LINK
TIGR_ROLE_NAMES;TIGRFAM;addons/tigrfam;https://ftp.ncbi.nlm.nih.gov/hmm/TIGRFAMs/release_15.0/TIGR_ROLE_NAMES
......
......@@ -21,7 +21,7 @@ or via wget.
We assume a PanTools alias was set during the
:ref:`installation <install:set pantools alias>`. This allows PanTools
to be executed with ``pantools`` rather than
``pantools/target/pantools-3.4.jar``. If you don’t have an alias, either
``pantools/target/pantools-|ProjectVersion|.jar``. If you don’t have an alias, either
set one or replace the pantools command with the full path to the .jar
file in the tutorials.
......
......@@ -18,7 +18,7 @@ the following commands.
We assume a PanTools alias was set during the
:ref:`installation <install:set pantools alias>`. This allows PanTools
to be executed with ``pantools`` rather than
``pantools/target/pantools-3.4.jar``. If you don’t have an alias, either
``pantools/target/pantools-|ProjectVersion|.jar``. If you don’t have an alias, either
set one or replace the pantools command with the full path to the .jar
file in the tutorials.
......
......@@ -6,7 +6,7 @@
<groupId>nl.wur.bif</groupId>
<artifactId>pantools</artifactId>
<version>3.4</version>
<version>4.0.0</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
......@@ -251,4 +251,4 @@
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
</project>
package nl.wur.bif.pantools.cli;
import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
import nl.wur.bif.pantools.cli.validation.BeanValidation;
import nl.wur.bif.pantools.pantools.Pantools;
import picocli.CommandLine.Model.CommandSpec;
import java.nio.file.Path;
import java.io.IOException;
import java.util.concurrent.Callable;
import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
import static nl.wur.bif.pantools.utils.Globals.*;
import static picocli.CommandLine.*;
......@@ -23,15 +23,12 @@ public class ConsensusTree implements Callable<Integer> {
@Spec CommandSpec spec;
@Mixin private ThreadNumber threadNumber;
@ArgGroup private SelectHmGroups selectHmGroups = new SelectHmGroups(); //needs to be initialized to prevent NullPointerException
@ParentCommand
@GraphDatabase
private Pantools pantools;
@Option(names = {"-H", "--homology-file"})
@InputFile(message = "{file.homology}")
Path homologyFile;
@Option(names = {"-l", "--log"}, hidden = true)
boolean log;
......@@ -39,22 +36,18 @@ public class ConsensusTree implements Callable<Integer> {
boolean polytomies;
@Override
public Integer call() {
public Integer call() throws IOException {
pantools.createLogger(spec);
new BeanValidation().argValidation(spec, this, threadNumber);
new BeanValidation().argValidation(spec, this, threadNumber, selectHmGroups);
pantools.setPangenomeGraph();
setGlobalParameters(); //TODO: use local parameters instead
phylogeny.consensus_tree();
phylogeny.consensus_tree(selectHmGroups.getHomologyGroups());
return 0;
}
private void setGlobalParameters() {
if (homologyFile != null) INPUT_FILE = homologyFile.toString();
THREADS = threadNumber.getnThreads();
LOG = log;
ALLOW_POLYTOMIES = polytomies;
}
}
......@@ -2,17 +2,17 @@ package nl.wur.bif.pantools.cli;
import jakarta.validation.constraints.Pattern;
import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
import nl.wur.bif.pantools.cli.validation.BeanValidation;
import nl.wur.bif.pantools.pantools.Pantools;
import picocli.CommandLine.Model.CommandSpec;
import java.nio.file.Path;
import java.io.IOException;
import java.util.concurrent.Callable;
import static jakarta.validation.constraints.Pattern.Flag.CASE_INSENSITIVE;
import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
import static nl.wur.bif.pantools.utils.Globals.*;
import static picocli.CommandLine.*;
......@@ -27,15 +27,12 @@ public class CorePhylogeny implements Callable<Integer> {
@Spec CommandSpec spec;
@Mixin private ThreadNumber threadNumber;
@ArgGroup private SelectGenomes selectGenomes;
@ArgGroup private SelectHmGroups selectHmGroups = new SelectHmGroups(); //needs to be initialized to prevent NullPointerException
@ParentCommand
@GraphDatabase
private Pantools pantools;
@Option(names = {"-H", "--homology-file"})
@InputFile(message = "{file.homology}")
Path homologyFile;
@Option(names = "--protein")
boolean protein;
......@@ -47,24 +44,20 @@ public class CorePhylogeny implements Callable<Integer> {
String mode;
@Override
public Integer call() {
public Integer call() throws IOException {
pantools.createLogger(spec);
new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes, selectHmGroups);
pantools.setPangenomeGraph();
setGlobalParameters(); //TODO: use local parameters instead
phylogeny.core_snp_tree();
phylogeny.core_snp_tree(selectHmGroups.getHomologyGroups());
return 0;
}
private void setGlobalParameters() {
setGenomeSelectionOptions(selectGenomes);
if (homologyFile != null) SELECTED_HMGROUPS = homologyFile.toString();
THREADS = threadNumber.getnThreads();
PHENOTYPE = phenotype;
CLUSTERING_METHOD = mode;
if (protein) Mode = "PROTEIN";
}
}
......@@ -2,12 +2,14 @@ package nl.wur.bif.pantools.cli;
import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
import nl.wur.bif.pantools.cli.validation.BeanValidation;
import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
import nl.wur.bif.pantools.pantools.Pantools;
import java.nio.file.Path;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
import static nl.wur.bif.pantools.cli.validation.Constraints.*;
import static nl.wur.bif.pantools.cli.validation.Constraints.Patterns.Flag.CASE_INSENSITIVE;
......@@ -24,15 +26,12 @@ public class GroupInfo implements Callable<Integer> {
@Spec Model.CommandSpec spec;
@ArgGroup private SelectGenomes selectGenomes;
@ArgGroup(multiplicity = "1") private SelectHmGroups selectHmGroups;
@ParentCommand
@GraphDatabase
private Pantools pantools;
@Parameters(descriptionKey = "homology-file", index = "0+")
@InputFile(message = "{file.homology}")
Path homologyFile;
@Option(names = "--functions")
void setFunctions(String value) {
functions = Arrays.asList(value.split(","));
......@@ -51,10 +50,9 @@ public class GroupInfo implements Callable<Integer> {
boolean obtainNodes;
@Override
public Integer call() {
public Integer call() throws IOException {
pantools.createLogger(spec);
new BeanValidation().argValidation(spec, this, selectGenomes);
new BeanValidation().argValidation(spec, this, selectGenomes, selectHmGroups);
pantools.setPangenomeGraph();
setGlobalParameters(); //TODO: use local parameters instead
......@@ -62,9 +60,10 @@ public class GroupInfo implements Callable<Integer> {
return 0;
}
private void setGlobalParameters() {
private void setGlobalParameters() throws IOException {
setGenomeSelectionOptions(selectGenomes);
SELECTED_HMGROUPS = homologyFile.toString();
SELECTED_HMGROUPS = selectHmGroups.getHomologyGroups().stream().map(String::valueOf).collect(Collectors.joining(","));
System.out.println("SELECTED_HMGROUPS = " + SELECTED_HMGROUPS);
if (genes != null) SELECTED_NAME = genes.toString().replaceAll("[\\[\\]]", "");
if (functions != null) SELECTED_LABEL = functions.toString().replaceAll("[\\[\\]]", "");
......
package nl.wur.bif.pantools.cli;
import jakarta.validation.constraints.Max;
import jakarta.validation.constraints.Min;
import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
import nl.wur.bif.pantools.cli.validation.BeanValidation;
import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
import nl.wur.bif.pantools.pantools.Pantools;
import picocli.CommandLine.Command;
import picocli.CommandLine.Model.CommandSpec;
import java.nio.file.Path;
import jakarta.validation.constraints.Max;
import jakarta.validation.constraints.Min;
import nl.wur.bif.pantools.cli.validation.BeanValidation;
import java.io.IOException;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
import static nl.wur.bif.pantools.cli.validation.Constraints.GraphDatabase;
import static nl.wur.bif.pantools.cli.validation.Constraints.InputFile;
import static nl.wur.bif.pantools.utils.Globals.*;
import static picocli.CommandLine.*;
......@@ -25,15 +28,12 @@ public class LocateGenes implements Callable<Integer> {
@Spec CommandSpec spec;
@ArgGroup private SelectGenomes selectGenomes;
@ArgGroup(multiplicity = "1") private SelectHmGroups selectHmGroups;
@ParentCommand
@GraphDatabase
private Pantools pantools;
@Parameters(descriptionKey = "homology-file", index = "0+")
@InputFile(message = "{file.homology}")
Path homologyFile;
@Option(names = {"-p", "--phenotype"})
String phenotype;
......@@ -54,10 +54,9 @@ public class LocateGenes implements Callable<Integer> {
boolean ignoreDuplications;
@Override
public Integer call() {
public Integer call() throws IOException {
pantools.createLogger(spec);
new BeanValidation().argValidation(spec, this, selectGenomes);
new BeanValidation().argValidation(spec, this, selectGenomes, selectHmGroups);
pantools.setPangenomeGraph("pangenome");
setGlobalParameters(); //TODO: use local parameters instead
......@@ -65,9 +64,10 @@ public class LocateGenes implements Callable<Integer> {
return 0;
}
private void setGlobalParameters() {
private void setGlobalParameters() throws IOException {
setGenomeSelectionOptions(selectGenomes);
SELECTED_HMGROUPS = homologyFile.toString();
SELECTED_HMGROUPS = selectHmGroups.getHomologyGroups().stream().map(String::valueOf).collect(Collectors.joining(","));
System.out.println("SELECTED_HMGROUPS = " + SELECTED_HMGROUPS);
core_threshold = coreThreshold;
PHENOTYPE = phenotype;
if (ignoreDuplications) Mode = "IGNORE-COPIES";
......
......@@ -5,11 +5,13 @@ import jakarta.validation.constraints.Min;
import jakarta.validation.constraints.Pattern;
import nl.wur.bif.pantools.cli.mixins.SelectGenomes;
import nl.wur.bif.pantools.cli.mixins.ThreadNumber;
import nl.wur.bif.pantools.cli.mixins.SelectHmGroups;
import nl.wur.bif.pantools.cli.validation.BeanValidation;
import nl.wur.bif.pantools.pangenome.MultipleSequenceAlignment;
import nl.wur.bif.pantools.pantools.Pantools;
import picocli.CommandLine.Model.CommandSpec;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
......@@ -37,15 +39,12 @@ public class MSA implements Callable<Integer> {
@Spec CommandSpec spec;
@Mixin private ThreadNumber threadNumber;
@ArgGroup private SelectGenomes selectGenomes;
@ArgGroup private SelectHmGroups selectHmGroups = new SelectHmGroups(); //needs to be initialized to prevent NullPointerException
@ParentCommand
@GraphDatabase
private Pantools pantools;
@Option(names = {"-H", "--homology-file"})
@InputFile(message = "{file.homology}")
Path homologyFile;
@Option(names = {"-R", "--regions-file"})
@InputFile(message = "{file.regions}")
Path regionsFile;
......@@ -88,10 +87,9 @@ public class MSA implements Callable<Integer> {
boolean variants;
@Override
public Integer call() {
public Integer call() throws IOException {
pantools.createLogger(spec);
new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes);
new BeanValidation().argValidation(spec, this, threadNumber, selectGenomes, selectHmGroups);
pantools.setPangenomeGraph();
method = method.replace("-", "_");