From ca65b981ea56ffbd5a6f0ae417263532f1b5cad7 Mon Sep 17 00:00:00 2001 From: "Esch, Roel van" <roel.vanesch@wur.nl> Date: Fri, 27 Jan 2023 16:05:19 +0000 Subject: [PATCH] storing scoring matrices as resource files --- CHANGELOG.md | 2 + docs/source/user_guide/construct.rst | 4 + docs/source/user_guide/phylogeny.rst | 8 +- .../BoundedLocalSequenceAlignment.java | 2170 +---------------- .../alignment/LocalSequenceAlignment.java | 878 +------ .../nl/wur/bif/pantools/cli/AddFunctions.java | 4 +- .../wur/bif/pantools/cli/ConsensusTree.java | 7 + .../wur/bif/pantools/cli/CorePhylogeny.java | 9 +- .../java/nl/wur/bif/pantools/cli/Group.java | 9 +- .../java/nl/wur/bif/pantools/cli/MSA.java | 3 +- .../wur/bif/pantools/cli/OptimalGrouping.java | 6 +- .../pantools/cli/validation/Constraints.java | 17 + .../validators/ScoringMatrixValidator.java | 28 + .../pantools/pangenome/Classification.java | 69 +- .../bif/pantools/pangenome/GenomeLayer.java | 4 +- .../wur/bif/pantools/pangenome/Phylogeny.java | 8 +- .../bif/pantools/pangenome/ProteomeLayer.java | 12 +- .../nl/wur/bif/pantools/utils/FileUtils.java | 46 +- .../nl/wur/bif/pantools/utils/Globals.java | 2 +- src/main/resources/Defaults.properties | 2 + src/main/resources/ErrorMessages.properties | 5 + src/main/resources/MessageBundle.properties | 5 +- src/main/resources/scoring-matrices/BLOSUM45 | 27 + src/main/resources/scoring-matrices/BLOSUM50 | 27 + src/main/resources/scoring-matrices/BLOSUM62 | 27 + src/main/resources/scoring-matrices/BLOSUM80 | 27 + src/main/resources/scoring-matrices/BLOSUM90 | 27 + src/main/resources/scoring-matrices/NUC.4.4 | 24 + src/main/resources/scoring-matrices/PAM250 | 27 + src/main/resources/scoring-matrices/PAM30 | 27 + src/main/resources/scoring-matrices/PAM70 | 27 + .../pantools/utils/LoadScoringMatrixTest.java | 39 + 32 files changed, 457 insertions(+), 3120 deletions(-) create mode 100644 src/main/java/nl/wur/bif/pantools/cli/validation/validators/ScoringMatrixValidator.java create mode 100644 src/main/resources/scoring-matrices/BLOSUM45 create mode 100644 src/main/resources/scoring-matrices/BLOSUM50 create mode 100644 src/main/resources/scoring-matrices/BLOSUM62 create mode 100644 src/main/resources/scoring-matrices/BLOSUM80 create mode 100644 src/main/resources/scoring-matrices/BLOSUM90 create mode 100644 src/main/resources/scoring-matrices/NUC.4.4 create mode 100644 src/main/resources/scoring-matrices/PAM250 create mode 100644 src/main/resources/scoring-matrices/PAM30 create mode 100644 src/main/resources/scoring-matrices/PAM70 create mode 100644 src/test/java/nl/wur/bif/pantools/utils/LoadScoringMatrixTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c80d9b2a..47c57260f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,12 @@ All notable changes to Pantools will be documented in this file. ### Added - Added `sphinx-lint` to both pre-commit hooks and CI/CD pipeline to ensure correctness documentation (!112 !116). - Option to add read group to alignment files produced by `pantools map` (!102). +- Added more BLOSUM and PAM protein scoring matrix options (!123). ### Changed - `add_functions` can now specify a directory where functional databases are stored (!117). - parameter `-v` is now required for `change_grouping` (!124). +- Scoring matrices are now stored in the resources directory as readable files (!123). ### Fixed - `msa` now works correctly with 'alt_id' properties of GO nodes (!118). diff --git a/docs/source/user_guide/construct.rst b/docs/source/user_guide/construct.rst index bdbd27cb5..b395a59f8 100644 --- a/docs/source/user_guide/construct.rst +++ b/docs/source/user_guide/construct.rst @@ -355,6 +355,8 @@ Options for genomes without an identifier. * - ``--longest`` - Only cluster protein sequences of the longest transcript per gene. + * - ``--scoring-matrix`` + - The scoring matrix used, default is BLOSUM62. * - ``--relaxation`` - The relaxation in homology calls. Should be in range [1-8], from strict to relaxed. This argument automatically sets @@ -503,6 +505,8 @@ Options compared to the previous clustering round. * - ``--longest`` - Only cluster protein sequences of the longest transcript per gene. + * - ``--scoring-matrix`` + - The scoring matrix used, default is BLOSUM62. * - ``--relaxation`` - Only consider a selection of relaxation settings (1-8 allowed). diff --git a/docs/source/user_guide/phylogeny.rst b/docs/source/user_guide/phylogeny.rst index 3d3d0e416..79d8be74c 100644 --- a/docs/source/user_guide/phylogeny.rst +++ b/docs/source/user_guide/phylogeny.rst @@ -92,6 +92,8 @@ Options * - ``--clustering-mode``/``-m`` - Maximum likelihood (--mode ML) or Neighbour joining (--mode NJ). Default is ML. + - A BLOSUM matrix to be used for the calculation of protein similarity. + Allowed values are 45, 50, 62 80 and 90 (default: 62). Example commands ~~~~~~~~~~~~~~~~ @@ -224,8 +226,10 @@ Options homology groups. (Mutually exclusive with ``--homology-groups``.) * - ``--homology-groups``/``-G`` - A comma separated list of homology group node identifiers. Default - is all homology groups. (Mutually exclusive with - ``--homology-file``.) + is all homology groups. (Mutually exclusive with ``--homology-file``.) + * - ``--blosum`` + - A BLOSUM matrix to be used for the calculation of protein similarity. + Allowed values are 45, 50, 62 80 and 90 (default: 62). * - ``--polytomies`` - Allow polytomies for ASTRAL-PRO. diff --git a/src/main/java/nl/wur/bif/pantools/alignment/BoundedLocalSequenceAlignment.java b/src/main/java/nl/wur/bif/pantools/alignment/BoundedLocalSequenceAlignment.java index a40d5c995..9e88e183e 100755 --- a/src/main/java/nl/wur/bif/pantools/alignment/BoundedLocalSequenceAlignment.java +++ b/src/main/java/nl/wur/bif/pantools/alignment/BoundedLocalSequenceAlignment.java @@ -1,5 +1,7 @@ package nl.wur.bif.pantools.alignment; +import nl.wur.bif.pantools.utils.FileUtils; + import java.util.Stack; /** @@ -32,7 +34,7 @@ public class BoundedLocalSequenceAlignment { private int deletions; private int insertions; private int BOUND; - private char TYPE; + private String TYPE; private int offset; private int range_len; private int mismatch_penalty, insertion_penalty; @@ -49,7 +51,7 @@ public class BoundedLocalSequenceAlignment { * @param clip The stringency of soft-clipping in the range [0..3] * @param type Type of the input sequences(N for nucleotide P for peptide). */ - public BoundedLocalSequenceAlignment(int gap_open, int gap_ext, int max_length, int band, int clip, char type) { + public BoundedLocalSequenceAlignment(int gap_open, int gap_ext, int max_length, int band, int clip, String type) { int i, j; seq1 = new StringBuilder(); seq2 = new StringBuilder(); @@ -89,12 +91,7 @@ public class BoundedLocalSequenceAlignment { left[0][j] = -1000; matrix[0][j] = 0; } - if (TYPE == 'N') - initialize_NUCC_matrix(); - else if (TYPE == 'P') - initialize_BLOSUM_matrix(62); - else - System.out.println("Aligner type should be N or P"); + match = FileUtils.loadScoringMatrix(TYPE); switch (CLIPPING_STRIGENCY) { case 1: mismatch_penalty = -1; @@ -110,2163 +107,6 @@ public class BoundedLocalSequenceAlignment { break; } } - - /** - * Initializes the NUCC.1 scoring matrix. - */ - public final void initialize_NUCC_matrix() { - match = new int[256][256]; - - match['A']['A'] = 5; - match['A']['T'] = -4; - match['A']['G'] = -4; - match['A']['C'] = -4; - match['A']['S'] = -4; - match['A']['W'] = 1; - match['A']['R'] = 1; - match['A']['Y'] = -4; - match['A']['K'] = -4; - match['A']['M'] = 1; - match['A']['B'] = -4; - match['A']['V'] = -1; - match['A']['H'] = -1; - match['A']['D'] = -1; - match['A']['N'] = -2; - match['T']['A'] = -4; - match['T']['T'] = 5; - match['T']['G'] = -4; - match['T']['C'] = -4; - match['T']['S'] = -4; - match['T']['W'] = 1; - match['T']['R'] = -4; - match['T']['Y'] = 1; - match['T']['K'] = 1; - match['T']['M'] = -4; - match['T']['B'] = -1; - match['T']['V'] = -4; - match['T']['H'] = -1; - match['T']['D'] = -1; - match['T']['N'] = -2; - match['G']['A'] = -4; - match['G']['T'] = -4; - match['G']['G'] = 5; - match['G']['C'] = -4; - match['G']['S'] = 1; - match['G']['W'] = -4; - match['G']['R'] = 1; - match['G']['Y'] = -4; - match['G']['K'] = 1; - match['G']['M'] = -4; - match['G']['B'] = -1; - match['G']['V'] = -1; - match['G']['H'] = -4; - match['G']['D'] = -1; - match['G']['N'] = -2; - match['C']['A'] = -4; - match['C']['T'] = -4; - match['C']['G'] = -4; - match['C']['C'] = 5; - match['C']['S'] = 1; - match['C']['W'] = -4; - match['C']['R'] = -4; - match['C']['Y'] = 1; - match['C']['K'] = -4; - match['C']['M'] = 1; - match['C']['B'] = -1; - match['C']['V'] = -1; - match['C']['H'] = -1; - match['C']['D'] = -4; - match['C']['N'] = -2; - match['S']['A'] = -4; - match['S']['T'] = -4; - match['S']['G'] = 1; - match['S']['C'] = 1; - match['S']['S'] = -1; - match['S']['W'] = -4; - match['S']['R'] = -2; - match['S']['Y'] = -2; - match['S']['K'] = -2; - match['S']['M'] = -2; - match['S']['B'] = -1; - match['S']['V'] = -1; - match['S']['H'] = -3; - match['S']['D'] = -3; - match['S']['N'] = -1; - match['W']['A'] = 1; - match['W']['T'] = 1; - match['W']['G'] = -4; - match['W']['C'] = -4; - match['W']['S'] = -4; - match['W']['W'] = -1; - match['W']['R'] = -2; - match['W']['Y'] = -2; - match['W']['K'] = -2; - match['W']['M'] = -2; - match['W']['B'] = -3; - match['W']['V'] = -3; - match['W']['H'] = -1; - match['W']['D'] = -1; - match['W']['N'] = -1; - match['R']['A'] = 1; - match['R']['T'] = -4; - match['R']['G'] = 1; - match['R']['C'] = -4; - match['R']['S'] = -2; - match['R']['W'] = -2; - match['R']['R'] = -1; - match['R']['Y'] = -4; - match['R']['K'] = -2; - match['R']['M'] = -2; - match['R']['B'] = -3; - match['R']['V'] = -1; - match['R']['H'] = -3; - match['R']['D'] = -1; - match['R']['N'] = -1; - match['Y']['A'] = -4; - match['Y']['T'] = 1; - match['Y']['G'] = -4; - match['Y']['C'] = 1; - match['Y']['S'] = -2; - match['Y']['W'] = -2; - match['Y']['R'] = -4; - match['Y']['Y'] = -1; - match['Y']['K'] = -2; - match['Y']['M'] = -2; - match['Y']['B'] = -1; - match['Y']['V'] = -3; - match['Y']['H'] = -1; - match['Y']['D'] = -3; - match['Y']['N'] = -1; - match['K']['A'] = -4; - match['K']['T'] = 1; - match['K']['G'] = 1; - match['K']['C'] = -4; - match['K']['S'] = -2; - match['K']['W'] = -2; - match['K']['R'] = -2; - match['K']['Y'] = -2; - match['K']['K'] = -1; - match['K']['M'] = -4; - match['K']['B'] = -1; - match['K']['V'] = -3; - match['K']['H'] = -3; - match['K']['D'] = -1; - match['K']['N'] = -1; - match['M']['A'] = 1; - match['M']['T'] = -4; - match['M']['G'] = -4; - match['M']['C'] = 1; - match['M']['S'] = -2; - match['M']['W'] = -2; - match['M']['R'] = -2; - match['M']['Y'] = -2; - match['M']['K'] = -4; - match['M']['M'] = -1; - match['M']['B'] = -3; - match['M']['V'] = -1; - match['M']['H'] = -1; - match['M']['D'] = -3; - match['M']['N'] = -1; - match['B']['A'] = -4; - match['B']['T'] = -1; - match['B']['G'] = -1; - match['B']['C'] = -1; - match['B']['S'] = -1; - match['B']['W'] = -3; - match['B']['R'] = -3; - match['B']['Y'] = -1; - match['B']['K'] = -1; - match['B']['M'] = -3; - match['B']['B'] = -1; - match['B']['V'] = -2; - match['B']['H'] = -2; - match['B']['D'] = -2; - match['B']['N'] = -1; - match['V']['A'] = -1; - match['V']['T'] = -4; - match['V']['G'] = -1; - match['V']['C'] = -1; - match['V']['S'] = -1; - match['V']['W'] = -3; - match['V']['R'] = -1; - match['V']['Y'] = -3; - match['V']['K'] = -3; - match['V']['M'] = -1; - match['V']['B'] = -2; - match['V']['V'] = -1; - match['V']['H'] = -2; - match['V']['D'] = -2; - match['V']['N'] = -1; - match['H']['A'] = -1; - match['H']['T'] = -1; - match['H']['G'] = -4; - match['H']['C'] = -1; - match['H']['S'] = -3; - match['H']['W'] = -1; - match['H']['R'] = -3; - match['H']['Y'] = -1; - match['H']['K'] = -3; - match['H']['M'] = -1; - match['H']['B'] = -2; - match['H']['V'] = -2; - match['H']['H'] = -1; - match['H']['D'] = -2; - match['H']['N'] = -1; - match['D']['A'] = -1; - match['D']['T'] = -1; - match['D']['G'] = -1; - match['D']['C'] = -4; - match['D']['S'] = -3; - match['D']['W'] = -1; - match['D']['R'] = -1; - match['D']['Y'] = -3; - match['D']['K'] = -1; - match['D']['M'] = -3; - match['D']['B'] = -2; - match['D']['V'] = -2; - match['D']['H'] = -2; - match['D']['D'] = -1; - match['D']['N'] = -1; - match['N']['A'] = -2; - match['N']['T'] = -2; - match['N']['G'] = -2; - match['N']['C'] = -2; - match['N']['S'] = -1; - match['N']['W'] = -1; - match['N']['R'] = -1; - match['N']['Y'] = -1; - match['N']['K'] = -1; - match['N']['M'] = -1; - match['N']['B'] = -1; - match['N']['V'] = -1; - match['N']['H'] = -1; - match['N']['D'] = -1; - match['N']['N'] = -1; - - } - - /** - * Initializes a BLOSUM45, BLOSUM62 or BLOSUM80 scoring matrix. - * - * Entries for the BLOSUM80 matrix at a scale of ln(2)/2.0. - * Source https://www.ncbi.nlm.nih.gov/IEB/ToolBox/C_DOC/lxr/source/data/BLOSUM80 - * - * Entries for the BLOSUM45 matrix at a scale of ln(2)/3.0. - * Source https://www.ncbi.nlm.nih.gov/IEB/ToolBox/C_DOC/lxr/source/data/BLOSUM45 - * @param number 45, 62 or 80 - */ - public static void initialize_BLOSUM_matrix(int number) { - if (number != 45 && number != 62 && number != 80){ - System.out.println("BLOSUM value must be 45, 62 or 80."); - System.exit(1); - } - match = new int[256][256]; - if (number == 62) { - match['A']['A'] = 4; - match['A']['R'] = -1; - match['A']['N'] = -2; - match['A']['D'] = -2; - match['A']['C'] = 0; - match['A']['Q'] = -1; - match['A']['E'] = -1; - match['A']['G'] = 0; - match['A']['H'] = -2; - match['A']['I'] = -1; - match['A']['L'] = -1; - match['A']['K'] = -1; - match['A']['M'] = -1; - match['A']['F'] = -2; - match['A']['P'] = -1; - match['A']['S'] = 1; - match['A']['T'] = 0; - match['A']['W'] = -3; - match['A']['Y'] = -2; - match['A']['V'] = 0; - match['A']['B'] = -2; - match['A']['Z'] = -1; - match['A']['X'] = 0; - match['A']['*'] = -4; - - match['R']['A'] = -1; - match['R']['R'] = 5; - match['R']['N'] = 0; - match['R']['D'] = -2; - match['R']['C'] = -3; - match['R']['Q'] = 1; - match['R']['E'] = 0; - match['R']['G'] = -2; - match['R']['H'] = 0; - match['R']['I'] = -3; - match['R']['L'] = -2; - match['R']['K'] = 2; - match['R']['M'] = -1; - match['R']['F'] = -3; - match['R']['P'] = -2; - match['R']['S'] = -1; - match['R']['T'] = -1; - match['R']['W'] = -3; - match['R']['Y'] = -2; - match['R']['V'] = -3; - match['R']['B'] = -1; - match['R']['Z'] = 0; - match['R']['X'] = -1; - match['R']['*'] = -4; - - match['N']['A'] = -2; - match['N']['R'] = 0; - match['N']['N'] = 6; - match['N']['D'] = 1; - match['N']['C'] = -3; - match['N']['Q'] = 0; - match['N']['E'] = 0; - match['N']['G'] = 0; - match['N']['H'] = 1; - match['N']['I'] = -3; - match['N']['L'] = -3; - match['N']['K'] = 0; - match['N']['M'] = -2; - match['N']['F'] = -3; - match['N']['P'] = -2; - match['N']['S'] = 1; - match['N']['T'] = 0; - match['N']['W'] = -4; - match['N']['Y'] = -2; - match['N']['V'] = -3; - match['N']['B'] = 3; - match['N']['Z'] = 0; - match['N']['X'] = -1; - match['N']['*'] = -4; - - match['D']['A'] = -2; - match['D']['R'] = -2; - match['D']['N'] = 1; - match['D']['D'] = 6; - match['D']['C'] = -3; - match['D']['Q'] = 0; - match['D']['E'] = 2; - match['D']['G'] = -1; - match['D']['H'] = -1; - match['D']['I'] = -3; - match['D']['L'] = -4; - match['D']['K'] = -1; - match['D']['M'] = -3; - match['D']['F'] = -3; - match['D']['P'] = -1; - match['D']['S'] = 0; - match['D']['T'] = -1; - match['D']['W'] = -4; - match['D']['Y'] = -3; - match['D']['V'] = -3; - match['D']['B'] = 4; - match['D']['Z'] = 1; - match['D']['X'] = -1; - match['D']['*'] = -4; - - match['C']['A'] = 0; - match['C']['R'] = -3; - match['C']['N'] = -3; - match['C']['D'] = -3; - match['C']['C'] = 9; - match['C']['Q'] = -3; - match['C']['E'] = -4; - match['C']['G'] = -3; - match['C']['H'] = -3; - match['C']['I'] = -1; - match['C']['L'] = -1; - match['C']['K'] = -3; - match['C']['M'] = -1; - match['C']['F'] = -2; - match['C']['P'] = -3; - match['C']['S'] = -1; - match['C']['T'] = -1; - match['C']['W'] = -2; - match['C']['Y'] = -2; - match['C']['V'] = -1; - match['C']['B'] = -3; - match['C']['Z'] = -3; - match['C']['X'] = -2; - match['C']['*'] = -4; - - match['Q']['A'] = -1; - match['Q']['R'] = 1; - match['Q']['N'] = 0; - match['Q']['D'] = 0; - match['Q']['C'] = -3; - match['Q']['Q'] = 5; - match['Q']['E'] = 2; - match['Q']['G'] = -2; - match['Q']['H'] = 0; - match['Q']['I'] = -3; - match['Q']['L'] = -2; - match['Q']['K'] = 1; - match['Q']['M'] = 0; - match['Q']['F'] = -3; - match['Q']['P'] = -1; - match['Q']['S'] = 0; - match['Q']['T'] = -1; - match['Q']['W'] = -2; - match['Q']['Y'] = -1; - match['Q']['V'] = -2; - match['Q']['B'] = 0; - match['Q']['Z'] = 3; - match['Q']['X'] = -1; - match['Q']['*'] = -4; - - match['E']['A'] = -1; - match['E']['R'] = 0; - match['E']['N'] = 0; - match['E']['D'] = 2; - match['E']['C'] = -4; - match['E']['Q'] = 2; - match['E']['E'] = 5; - match['E']['G'] = -2; - match['E']['H'] = 0; - match['E']['I'] = -3; - match['E']['L'] = -3; - match['E']['K'] = 1; - match['E']['M'] = -2; - match['E']['F'] = -3; - match['E']['P'] = -1; - match['E']['S'] = 0; - match['E']['T'] = -1; - match['E']['W'] = -3; - match['E']['Y'] = -2; - match['E']['V'] = -2; - match['E']['B'] = 1; - match['E']['Z'] = 4; - match['E']['X'] = -1; - match['E']['*'] = -4; - - match['G']['A'] = 0; - match['G']['R'] = -2; - match['G']['N'] = 0; - match['G']['D'] = -1; - match['G']['C'] = -3; - match['G']['Q'] = -2; - match['G']['E'] = -2; - match['G']['G'] = 6; - match['G']['H'] = -2; - match['G']['I'] = -4; - match['G']['L'] = -4; - match['G']['K'] = -2; - match['G']['M'] = -3; - match['G']['F'] = -3; - match['G']['P'] = -2; - match['G']['S'] = 0; - match['G']['T'] = -2; - match['G']['W'] = -2; - match['G']['Y'] = -3; - match['G']['V'] = -3; - match['G']['B'] = -1; - match['G']['Z'] = -2; - match['G']['X'] = -1; - match['G']['*'] = -4; - - match['H']['A'] = -2; - match['H']['R'] = 0; - match['H']['N'] = 1; - match['H']['D'] = -1; - match['H']['C'] = -3; - match['H']['Q'] = 0; - match['H']['E'] = 0; - match['H']['G'] = -2; - match['H']['H'] = 8; - match['H']['I'] = -3; - match['H']['L'] = -3; - match['H']['K'] = -1; - match['H']['M'] = -2; - match['H']['F'] = -1; - match['H']['P'] = -2; - match['H']['S'] = -1; - match['H']['T'] = -2; - match['H']['W'] = -2; - match['H']['Y'] = 2; - match['H']['V'] = -3; - match['H']['B'] = 0; - match['H']['Z'] = 0; - match['H']['X'] = -1; - match['H']['*'] = -4; - - match['I']['A'] = -1; - match['I']['R'] = -3; - match['I']['N'] = -3; - match['I']['D'] = -3; - match['I']['C'] = -1; - match['I']['Q'] = -3; - match['I']['E'] = -3; - match['I']['G'] = -4; - match['I']['H'] = -3; - match['I']['I'] = 4; - match['I']['L'] = 2; - match['I']['K'] = -3; - match['I']['M'] = 1; - match['I']['F'] = 0; - match['I']['P'] = -3; - match['I']['S'] = -2; - match['I']['T'] = -1; - match['I']['W'] = -3; - match['I']['Y'] = -1; - match['I']['V'] = 3; - match['I']['B'] = -3; - match['I']['Z'] = -3; - match['I']['X'] = -1; - match['I']['*'] = -4; - - match['L']['A'] = -1; - match['L']['R'] = -2; - match['L']['N'] = -3; - match['L']['D'] = -4; - match['L']['C'] = -1; - match['L']['Q'] = -2; - match['L']['E'] = -3; - match['L']['G'] = -4; - match['L']['H'] = -3; - match['L']['I'] = 2; - match['L']['L'] = 4; - match['L']['K'] = -2; - match['L']['M'] = 2; - match['L']['F'] = 0; - match['L']['P'] = -3; - match['L']['S'] = -2; - match['L']['T'] = -1; - match['L']['W'] = -2; - match['L']['Y'] = -1; - match['L']['V'] = 1; - match['L']['B'] = -4; - match['L']['Z'] = -3; - match['L']['X'] = -1; - match['L']['*'] = -4; - - match['K']['A'] = -1; - match['K']['R'] = 2; - match['K']['N'] = 0; - match['K']['D'] = -1; - match['K']['C'] = -3; - match['K']['Q'] = 1; - match['K']['E'] = 1; - match['K']['G'] = -2; - match['K']['H'] = -1; - match['K']['I'] = -3; - match['K']['L'] = -2; - match['K']['K'] = 5; - match['K']['M'] = -1; - match['K']['F'] = -3; - match['K']['P'] = -1; - match['K']['S'] = 0; - match['K']['T'] = -1; - match['K']['W'] = -3; - match['K']['Y'] = -2; - match['K']['V'] = -2; - match['K']['B'] = 0; - match['K']['Z'] = 1; - match['K']['X'] = -1; - match['K']['*'] = -4; - - match['M']['A'] = -1; - match['M']['R'] = -1; - match['M']['N'] = -2; - match['M']['D'] = -3; - match['M']['C'] = -1; - match['M']['Q'] = 0; - match['M']['E'] = -2; - match['M']['G'] = -3; - match['M']['H'] = -2; - match['M']['I'] = 1; - match['M']['L'] = 2; - match['M']['K'] = -1; - match['M']['M'] = 5; - match['M']['F'] = 0; - match['M']['P'] = -2; - match['M']['S'] = -1; - match['M']['T'] = -1; - match['M']['W'] = -1; - match['M']['Y'] = -1; - match['M']['V'] = 1; - match['M']['B'] = -3; - match['M']['Z'] = -1; - match['M']['X'] = -1; - match['M']['*'] = -4; - - match['F']['A'] = -2; - match['F']['R'] = -3; - match['F']['N'] = -3; - match['F']['D'] = -3; - match['F']['C'] = -2; - match['F']['Q'] = -3; - match['F']['E'] = -3; - match['F']['G'] = -3; - match['F']['H'] = -1; - match['F']['I'] = 0; - match['F']['L'] = 0; - match['F']['K'] = -3; - match['F']['M'] = 0; - match['F']['F'] = 6; - match['F']['P'] = -4; - match['F']['S'] = -2; - match['F']['T'] = -2; - match['F']['W'] = 1; - match['F']['Y'] = 3; - match['F']['V'] = -1; - match['F']['B'] = -3; - match['F']['Z'] = -3; - match['F']['X'] = -1; - match['F']['*'] = -4; - - match['P']['A'] = -1; - match['P']['R'] = -2; - match['P']['N'] = -2; - match['P']['D'] = -1; - match['P']['C'] = -3; - match['P']['Q'] = -1; - match['P']['E'] = -1; - match['P']['G'] = -2; - match['P']['H'] = -2; - match['P']['I'] = -3; - match['P']['L'] = -3; - match['P']['K'] = -1; - match['P']['M'] = -2; - match['P']['F'] = -4; - match['P']['P'] = 7; - match['P']['S'] = -1; - match['P']['T'] = -1; - match['P']['W'] = -4; - match['P']['Y'] = -3; - match['P']['V'] = -2; - match['P']['B'] = -2; - match['P']['Z'] = -1; - match['P']['X'] = -2; - match['P']['*'] = -4; - - match['S']['A'] = 1; - match['S']['R'] = -1; - match['S']['N'] = 1; - match['S']['D'] = 0; - match['S']['C'] = -1; - match['S']['Q'] = 0; - match['S']['E'] = 0; - match['S']['G'] = 0; - match['S']['H'] = -1; - match['S']['I'] = -2; - match['S']['L'] = -2; - match['S']['K'] = 0; - match['S']['M'] = -1; - match['S']['F'] = -2; - match['S']['P'] = -1; - match['S']['S'] = 4; - match['S']['T'] = 1; - match['S']['W'] = -3; - match['S']['Y'] = -2; - match['S']['V'] = -2; - match['S']['B'] = 0; - match['S']['Z'] = 0; - match['S']['X'] = 0; - match['S']['*'] = -4; - - match['T']['A'] = 0; - match['T']['R'] = -1; - match['T']['N'] = 0; - match['T']['D'] = -1; - match['T']['C'] = -1; - match['T']['Q'] = -1; - match['T']['E'] = -1; - match['T']['G'] = -2; - match['T']['H'] = -2; - match['T']['I'] = -1; - match['T']['L'] = -1; - match['T']['K'] = -1; - match['T']['M'] = -1; - match['T']['F'] = -2; - match['T']['P'] = -1; - match['T']['S'] = 1; - match['T']['T'] = 5; - match['T']['W'] = -2; - match['T']['Y'] = -2; - match['T']['V'] = 0; - match['T']['B'] = -1; - match['T']['Z'] = -1; - match['T']['X'] = 0; - match['T']['*'] = -4; - - match['W']['A'] = -3; - match['W']['R'] = -3; - match['W']['N'] = -4; - match['W']['D'] = -4; - match['W']['C'] = -2; - match['W']['Q'] = -2; - match['W']['E'] = -3; - match['W']['G'] = -2; - match['W']['H'] = -2; - match['W']['I'] = -3; - match['W']['L'] = -2; - match['W']['K'] = -3; - match['W']['M'] = -1; - match['W']['F'] = 1; - match['W']['P'] = -4; - match['W']['S'] = -3; - match['W']['T'] = -2; - match['W']['W'] = 11; - match['W']['Y'] = 2; - match['W']['V'] = -3; - match['W']['B'] = -4; - match['W']['Z'] = -3; - match['W']['X'] = -2; - match['W']['*'] = -4; - - match['Y']['A'] = -2; - match['Y']['R'] = -2; - match['Y']['N'] = -2; - match['Y']['D'] = -3; - match['Y']['C'] = -2; - match['Y']['Q'] = -1; - match['Y']['E'] = -2; - match['Y']['G'] = -3; - match['Y']['H'] = 2; - match['Y']['I'] = -1; - match['Y']['L'] = -1; - match['Y']['K'] = -2; - match['Y']['M'] = -1; - match['Y']['F'] = 3; - match['Y']['P'] = -3; - match['Y']['S'] = -2; - match['Y']['T'] = -2; - match['Y']['W'] = 2; - match['Y']['Y'] = 7; - match['Y']['V'] = -1; - match['Y']['B'] = -3; - match['Y']['Z'] = -2; - match['Y']['X'] = -1; - match['Y']['*'] = -4; - - match['V']['A'] = 0; - match['V']['R'] = -3; - match['V']['N'] = -3; - match['V']['D'] = -3; - match['V']['C'] = -1; - match['V']['Q'] = -2; - match['V']['E'] = -2; - match['V']['G'] = -3; - match['V']['H'] = -3; - match['V']['I'] = 3; - match['V']['L'] = 1; - match['V']['K'] = -2; - match['V']['M'] = 1; - match['V']['F'] = -1; - match['V']['P'] = -2; - match['V']['S'] = -2; - match['V']['T'] = 0; - match['V']['W'] = -3; - match['V']['Y'] = -1; - match['V']['V'] = 4; - match['V']['B'] = -3; - match['V']['Z'] = -2; - match['V']['X'] = -1; - match['V']['*'] = -4; - - match['B']['A'] = -2; - match['B']['R'] = -1; - match['B']['N'] = 3; - match['B']['D'] = 4; - match['B']['C'] = -3; - match['B']['Q'] = 0; - match['B']['E'] = 1; - match['B']['G'] = -1; - match['B']['H'] = 0; - match['B']['I'] = -3; - match['B']['L'] = -4; - match['B']['K'] = 0; - match['B']['M'] = -3; - match['B']['F'] = -3; - match['B']['P'] = -2; - match['B']['S'] = 0; - match['B']['T'] = -1; - match['B']['W'] = -4; - match['B']['Y'] = -3; - match['B']['V'] = -3; - match['B']['B'] = 4; - match['B']['Z'] = 1; - match['B']['X'] = -1; - match['B']['*'] = -4; - - match['Z']['A'] = -1; - match['Z']['R'] = 0; - match['Z']['N'] = 0; - match['Z']['D'] = 1; - match['Z']['C'] = -3; - match['Z']['Q'] = 3; - match['Z']['E'] = 4; - match['Z']['G'] = -2; - match['Z']['H'] = 0; - match['Z']['I'] = -3; - match['Z']['L'] = -3; - match['Z']['K'] = 1; - match['Z']['M'] = -1; - match['Z']['F'] = -3; - match['Z']['P'] = -1; - match['Z']['S'] = 0; - match['Z']['T'] = -1; - match['Z']['W'] = -3; - match['Z']['Y'] = -2; - match['Z']['V'] = -2; - match['Z']['B'] = 1; - match['Z']['Z'] = 4; - match['Z']['X'] = -1; - match['Z']['*'] = -4; - - match['X']['A'] = 0; - match['X']['R'] = -1; - match['X']['N'] = -1; - match['X']['D'] = -1; - match['X']['C'] = -2; - match['X']['Q'] = -1; - match['X']['E'] = -1; - match['X']['G'] = -1; - match['X']['H'] = -1; - match['X']['I'] = -1; - match['X']['L'] = -1; - match['X']['K'] = -1; - match['X']['M'] = -1; - match['X']['F'] = -1; - match['X']['P'] = -2; - match['X']['S'] = 0; - match['X']['T'] = 0; - match['X']['W'] = -2; - match['X']['Y'] = -1; - match['X']['V'] = -1; - match['X']['B'] = -1; - match['X']['Z'] = -1; - match['X']['X'] = -1; - match['X']['*'] = -4; - - match['*']['A'] = -4; - match['*']['R'] = -4; - match['*']['N'] = -4; - match['*']['D'] = -4; - match['*']['C'] = -4; - match['*']['Q'] = -4; - match['*']['E'] = -4; - match['*']['G'] = -4; - match['*']['H'] = -4; - match['*']['I'] = -4; - match['*']['L'] = -4; - match['*']['K'] = -4; - match['*']['M'] = -4; - match['*']['F'] = -4; - match['*']['P'] = -4; - match['*']['S'] = -4; - match['*']['T'] = -4; - match['*']['W'] = -4; - match['*']['Y'] = -4; - match['*']['V'] = -4; - match['*']['B'] = -4; - match['*']['Z'] = -4; - match['*']['X'] = -4; - match['*']['*'] = 1; - } - - if (number == 80) { - match['A']['A'] = 5; - match['A']['R'] = -2; - match['A']['N'] = -2; - match['A']['D'] = -2; - match['A']['C'] = -1; - match['A']['Q'] = -1; - match['A']['E'] = -1; - match['A']['G'] = 0; - match['A']['H'] = -2; - match['A']['I'] = -2; - match['A']['L'] = -2; - match['A']['K'] = -1; - match['A']['M'] = -1; - match['A']['F'] = -3; - match['A']['P'] = -1; - match['A']['S'] = 1; - match['A']['T'] = 0; - match['A']['W'] = -3; - match['A']['Y'] = -2; - match['A']['V'] = 0; - match['A']['B'] = -2; - match['A']['J'] = -2; - match['A']['Z'] = -1; - match['A']['X'] = -1; - match['A']['*'] = -6; - - match['R']['A'] = -2; - match['R']['R'] = 6; - match['R']['N'] = -1; - match['R']['D'] = -2; - match['R']['C'] = -4; - match['R']['Q'] = 1; - match['R']['E'] = -1; - match['R']['G'] = -3; - match['R']['H'] = 0; - match['R']['I'] = -3; - match['R']['L'] = -3; - match['R']['K'] = 2; - match['R']['M'] = -2; - match['R']['F'] = -4; - match['R']['P'] = -2; - match['R']['S'] = -1; - match['R']['T'] = -1; - match['R']['W'] = -4; - match['R']['Y'] = -3; - match['R']['V'] = -3; - match['R']['B'] = -1; - match['R']['J'] = -3; - match['R']['Z'] = 0; - match['R']['X'] = -1; - match['R']['*'] = -6; - - match['N']['A'] = -2; - match['N']['R'] = -1; - match['N']['N'] = 6; - match['N']['D'] = 1; - match['N']['C'] = -3; - match['N']['Q'] = 0; - match['N']['E'] = -1; - match['N']['G'] = -1; - match['N']['H'] = 0; - match['N']['I'] = -4; - match['N']['L'] = -4; - match['N']['K'] = 0; - match['N']['M'] = -3; - match['N']['F'] = -4; - match['N']['P'] = -3; - match['N']['S'] = 0; - match['N']['T'] = 0; - match['N']['W'] = -4; - match['N']['Y'] = -3; - match['N']['V'] = -4; - match['N']['B'] = 5; - match['N']['J'] = -4; - match['N']['Z'] = 0; - match['N']['X'] = -1; - match['N']['*'] = -6; - - match['D']['A'] = -2; - match['D']['R'] = -2; - match['D']['N'] = 1; - match['D']['D'] = 6; - match['D']['C'] = -4; - match['D']['Q'] = -1; - match['D']['E'] = 1; - match['D']['G'] = -2; - match['D']['H'] = -2; - match['D']['I'] = -4; - match['D']['L'] = -5; - match['D']['K'] = -1; - match['D']['M'] = -4; - match['D']['F'] = -4; - match['D']['P'] = -2; - match['D']['S'] = -1; - match['D']['T'] = -1; - match['D']['W'] = -6; - match['D']['Y'] = -4; - match['D']['V'] = -4; - match['D']['B'] = 5; - match['D']['J'] = -5; - match['D']['Z'] = 1; - match['D']['X'] = -1; - match['D']['*'] = -6; - - match['C']['A'] = -1; - match['C']['R'] = -4; - match['C']['N'] = -3; - match['C']['D'] = -4; - match['C']['C'] = 9; - match['C']['Q'] = -4; - match['C']['E'] = -5; - match['C']['G'] = -4; - match['C']['H'] = -4; - match['C']['I'] = -2; - match['C']['L'] = -2; - match['C']['K'] = -4; - match['C']['M'] = -2; - match['C']['F'] = -3; - match['C']['P'] = -4; - match['C']['S'] = -2; - match['C']['T'] = -1; - match['C']['W'] = -3; - match['C']['Y'] = -3 ; - match['C']['V'] = -1; - match['C']['B'] = -4; - match['C']['J'] = -2 ; - match['C']['Z'] = -4; - match['C']['X'] = -1; - match['C']['*'] = -6; - - match['Q']['A'] = -1; - match['Q']['R'] = 1; - match['Q']['N'] = 0; - match['Q']['D'] = -1; - match['Q']['C'] = -4; - match['Q']['Q'] = 6; - match['Q']['E'] = 2; - match['Q']['G'] = -2; - match['Q']['H'] = 1; - match['Q']['I'] = -3; - match['Q']['L'] = -3; - match['Q']['K'] = 1; - match['Q']['M'] = 0; - match['Q']['F'] = -4; - match['Q']['P'] = -2; - match['Q']['S'] = 0; - match['Q']['T'] = -1; - match['Q']['W'] = -3; - match['Q']['Y'] = -2; - match['Q']['V'] = -3; - match['Q']['B'] = 0; - match['Q']['J'] = -3; - match['Q']['Z'] = 4; - match['Q']['X'] = -1; - match['Q']['*'] = -6; - - match['E']['A'] = -1; - match['E']['R'] = -1; - match['E']['N'] = -1; - match['E']['D'] = 1; - match['E']['C'] = -5; - match['E']['Q'] = 2; - match['E']['E'] = 6; - match['E']['G'] = -3; - match['E']['H'] = 0; - match['E']['I'] = -4; - match['E']['L'] = -4; - match['E']['K'] = 1; - match['E']['M'] = -2; - match['E']['F'] = -4; - match['E']['P'] = -2; - match['E']['S'] = 0; - match['E']['T'] = -1; - match['E']['W'] = -4 ; - match['E']['Y'] = -3; - match['E']['V'] = -3; - match['E']['B'] = 1; - match['E']['J'] = -4; - match['E']['Z'] = 5; - match['E']['X'] = -1; - match['E']['*'] = -6; - - match['G']['A'] = 0; - match['G']['R'] = -3; - match['G']['N'] = -1; - match['G']['D'] = -2; - match['G']['C'] = -4; - match['G']['Q'] = -2; - match['G']['E'] = -3; - match['G']['G'] = 6; - match['G']['H'] = -3; - match['G']['I'] = -5; - match['G']['L'] = -4; - match['G']['K'] = -2; - match['G']['M'] = -4; - match['G']['F'] = -4; - match['G']['P'] = -3; - match['G']['S'] = -1; - match['G']['T'] = -2; - match['G']['W'] = -4; - match['G']['Y'] = -4; - match['G']['V'] = -4; - match['G']['B'] = -1; - match['G']['J'] = -5; - match['G']['Z'] = -3; - match['G']['X'] = -1; - match['G']['*'] = -6; - - match['H']['A'] = -2; - match['H']['R'] = 0; - match['H']['N'] = 0; - match['H']['D'] = -2; - match['H']['C'] = -4; - match['H']['Q'] = 1; - match['H']['E'] = 0; - match['H']['G'] = -3; - match['H']['H'] = 8; - match['H']['I'] = -4; - match['H']['L'] = -3; - match['H']['K'] = -1; - match['H']['M'] = -2; - match['H']['F'] = -2; - match['H']['P'] = -3; - match['H']['S'] = -1; - match['H']['T'] = -2; - match['H']['W'] = -3; - match['H']['Y'] = 2; - match['H']['V'] = -4; - match['H']['B'] = -1; - match['H']['J'] = -4; - match['H']['Z'] = 0; - match['H']['X'] = -1; - match['H']['*'] = -6; - - match['I']['A'] = -2; - match['I']['R'] = -3; - match['I']['N'] = -4; - match['I']['D'] = -4; - match['I']['C'] = -2; - match['I']['Q'] = -3; - match['I']['E'] = -4; - match['I']['G'] = -5; - match['I']['H'] = -4; - match['I']['I'] = 5; - match['I']['L'] = 1; - match['I']['K'] = -3; - match['I']['M'] = 1; - match['I']['F'] = -1; - match['I']['P'] = -4; - match['I']['S'] = -3; - match['I']['T'] = -1; - match['I']['W'] = -3; - match['I']['Y'] = -2; - match['I']['V'] = 3; - match['I']['B'] = -4; - match['I']['J'] = 3; - match['I']['Z'] = -4; - match['I']['X'] = -1; - match['I']['*'] = -6; - - match['L']['A'] = -2; - match['L']['R'] = -3; - match['L']['N'] = -4; - match['L']['D'] = -5; - match['L']['C'] = -2; - match['L']['Q'] = -3; - match['L']['E'] = -4; - match['L']['G'] = -4; - match['L']['H'] = -3; - match['L']['I'] = 1; - match['L']['L'] = 4; - match['L']['K'] = -3; - match['L']['M'] = 2; - match['L']['F'] = 0; - match['L']['P'] = -3; - match['L']['S'] = -3; - match['L']['T'] = -2; - match['L']['W'] = -2; - match['L']['Y'] = -2; - match['L']['V'] = 1; - match['L']['B'] = -4; - match['L']['J'] = 3; - match['L']['Z'] = -3; - match['L']['X'] = -1; - match['L']['*'] = -6; - - match['K']['A'] = -1; - match['K']['R'] = 2; - match['K']['N'] = 0; - match['K']['D'] = -1; - match['K']['C'] = -4; - match['K']['Q'] = 1; - match['K']['E'] = 1; - match['K']['G'] = -2; - match['K']['H'] = -1; - match['K']['I'] = -3; - match['K']['L'] = -3; - match['K']['K'] = 5; - match['K']['M'] = -2; - match['K']['F'] = -4; - match['K']['P'] = -1; - match['K']['S'] = -1; - match['K']['T'] = -1; - match['K']['W'] = -4; - match['K']['Y'] = -3; - match['K']['V'] = -3; - match['K']['B'] = -1; - match['K']['J'] = -3; - match['K']['Z'] = 1; - match['K']['X'] = -1; - match['K']['*'] = -6; - - match['M']['A'] = -1; - match['M']['R'] = -2; - match['M']['N'] = -3; - match['M']['D'] = -4; - match['M']['C'] = -2; - match['M']['Q'] = 0; - match['M']['E'] = -2; - match['M']['G'] = -4; - match['M']['H'] = -2; - match['M']['I'] = 1; - match['M']['L'] = 2; - match['M']['K'] = -2; - match['M']['M'] = 6; - match['M']['F'] = 0; - match['M']['P'] = -3; - match['M']['S'] = -2; - match['M']['T'] = -1; - match['M']['W'] = -2; - match['M']['Y'] = -2; - match['M']['V'] = 1; - match['M']['B'] = -3; - match['M']['J'] = 2; - match['M']['Z'] = -1; - match['M']['X'] = -1; - match['M']['*'] = -6; - - match['F']['A'] = -3; - match['F']['R'] = -4; - match['F']['N'] = -4; - match['F']['D'] = -4; - match['F']['C'] = -3; - match['F']['Q'] = -4; - match['F']['E'] = -4; - match['F']['G'] = -4; - match['F']['H'] = -2; - match['F']['I'] = -1; - match['F']['L'] = 0; - match['F']['K'] = -4; - match['F']['M'] = 0; - match['F']['F'] = 6; - match['F']['P'] = -4; - match['F']['S'] = -3; - match['F']['T'] = -2; - match['F']['W'] = 0; - match['F']['Y'] = 3; - match['F']['V'] = -1; - match['F']['B'] = -4; - match['F']['J'] = 0; - match['F']['Z'] = -4; - match['F']['X'] = -1; - match['F']['*'] = -6; - - match['P']['A'] = -1; - match['P']['R'] = -2; - match['P']['N'] = -3; - match['P']['D'] = -2; - match['P']['C'] = -4 ; - match['P']['Q'] = -2; - match['P']['E'] = -2 ; - match['P']['G'] = -3; - match['P']['H'] = -3; - match['P']['I'] = -4; - match['P']['L'] = -3 ; - match['P']['K'] = -1; - match['P']['M'] = -3; - match['P']['F'] = -4 ; - match['P']['P'] = 8; - match['P']['S'] = -1; - match['P']['T'] = -2; - match['P']['W'] = -5; - match['P']['Y'] = -4; - match['P']['V'] = -3; - match['P']['B'] = -2; - match['P']['J'] = -4; - match['P']['Z'] = -2; - match['P']['X'] = -1; - match['P']['*'] = -6; - - match['S']['A'] = 1; - match['S']['R'] = -1; - match['S']['N'] = 0; - match['S']['D'] = -1; - match['S']['C'] = -2; - match['S']['Q'] = 0; - match['S']['E'] = 0; - match['S']['G'] = -1; - match['S']['H'] = -1; - match['S']['I'] = -3; - match['S']['L'] = -3; - match['S']['K'] = -1; - match['S']['M'] = -2; - match['S']['F'] = -3; - match['S']['P'] = -1; - match['S']['S'] = 5; - match['S']['T'] = 1; - match['S']['W'] = -4; - match['S']['Y'] = -2; - match['S']['V'] = -2; - match['S']['B'] = 0; - match['S']['J'] = -3; - match['S']['Z'] = 0; - match['S']['X'] = -1; - match['S']['*'] = -6; - - match['T']['A'] = 0; - match['T']['R'] = -1; - match['T']['N'] = 0; - match['T']['D'] = -1; - match['T']['C'] = -1; - match['T']['Q'] = -1; - match['T']['E'] = -1; - match['T']['G'] = -2; - match['T']['H'] = -2; - match['T']['I'] = -1; - match['T']['L'] = -2; - match['T']['K'] = -1 ; - match['T']['M'] = -1; - match['T']['F'] = -2; - match['T']['P'] = -2; - match['T']['S'] = 1; - match['T']['T'] = 5; - match['T']['W'] = -4; - match['T']['Y'] = -2; - match['T']['V'] = 0; - match['T']['B'] = -1; - match['T']['J'] = -1; - match['T']['Z'] = -1 ; - match['T']['X'] = -1; - match['T']['*'] = -6; - - match['W']['A'] = -3; - match['W']['R'] = -4; - match['W']['N'] = -4; - match['W']['D'] = -6; - match['W']['C'] = -3; - match['W']['Q'] = -3; - match['W']['E'] = -4; - match['W']['G'] = -4; - match['W']['H'] = -3; - match['W']['I'] = -3; - match['W']['L'] = -2; - match['W']['K'] = -4; - match['W']['M'] = -2 ; - match['W']['F'] = 0; - match['W']['P'] = -5; - match['W']['S'] = -4; - match['W']['T'] = -4; - match['W']['W'] = 11; - match['W']['Y'] = 2; - match['W']['V'] = -3; - match['W']['B'] = -5; - match['W']['J'] = -3; - match['W']['Z'] = -3; - match['W']['X'] = -1; - match['W']['*'] = -6; - - match['Y']['A'] = -2 ; - match['Y']['R'] = -3; - match['Y']['N'] = -3; - match['Y']['D'] = -4; - match['Y']['C'] = -3; - match['Y']['Q'] = -2; - match['Y']['E'] = -3; - match['Y']['G'] = -4; - match['Y']['H'] = 2; - match['Y']['I'] = -2; - match['Y']['L'] = -2; - match['Y']['K'] = -3; - match['Y']['M'] = -2; - match['Y']['F'] = 3; - match['Y']['P'] = -4; - match['Y']['S'] = -2; - match['Y']['T'] = -2; - match['Y']['W'] = 2; - match['Y']['Y'] = 7; - match['Y']['V'] = -2; - match['Y']['B'] = -3; - match['Y']['J'] = -2; - match['Y']['Z'] = -3; - match['Y']['X'] = -1; - match['Y']['*'] = -6; - - match['V']['A'] = 0; - match['V']['R'] = -3; - match['V']['N'] = -4; - match['V']['D'] = -4; - match['V']['C'] = -1; - match['V']['Q'] = -3; - match['V']['E'] = -3; - match['V']['G'] = -4; - match['V']['H'] = -4; - match['V']['I'] = 3; - match['V']['L'] = 1; - match['V']['K'] = -3; - match['V']['M'] = 1; - match['V']['F'] = -1; - match['V']['P'] = -3; - match['V']['S'] = -2; - match['V']['T'] = 0; - match['V']['W'] = -3; - match['V']['Y'] = -2; - match['V']['V'] = 4; - match['V']['B'] = -4; - match['V']['J'] = 2; - match['V']['Z'] = -3; - match['V']['X'] = -1; - match['V']['*'] = -6; - - match['B']['A'] = -2; - match['B']['R'] = -1; - match['B']['N'] = 5; - match['B']['D'] = 5; - match['B']['C'] = -4; - match['B']['Q'] = 0; - match['B']['E'] = 1; - match['B']['G'] = -1; - match['B']['H'] = -1; - match['B']['I'] = -4; - match['B']['L'] = -4; - match['B']['K'] = -1 ; - match['B']['M'] = -3; - match['B']['F'] = -4; - match['B']['P'] = -2; - match['B']['S'] = 0; - match['B']['T'] = -1; - match['B']['W'] = -5 ; - match['B']['Y'] = -3 ; - match['B']['V'] = -4; - match['B']['B'] = 5; - match['B']['J'] = -4; - match['B']['Z'] = 0; - match['B']['X'] = -1; - match['B']['*'] = -6; - - match['J']['A'] = -2; - match['J']['R'] = -3; - match['J']['N'] = -4 ; - match['J']['D'] = -5; - match['J']['C'] = -2; - match['J']['Q'] = -3; - match['J']['E'] = -4; - match['J']['G'] = -5; - match['J']['H'] = -4 ; - match['J']['I'] = 3; - match['J']['L'] = 3; - match['J']['K'] = -3; - match['J']['M'] = 2; - match['J']['F'] = 0; - match['J']['P'] = -4; - match['J']['S'] = -3 ; - match['J']['T'] = -1; - match['J']['W'] = -3; - match['J']['Y'] = -2; - match['J']['V'] = 2; - match['J']['B'] = -4; - match['J']['J'] = 3; - match['J']['Z'] = -3; - match['J']['X'] = -1; - match['J']['*'] = -6; - - match['Z']['A'] = -1; - match['Z']['R'] = 0; - match['Z']['N'] = 0; - match['Z']['D'] = 1; - match['Z']['C'] = -4; - match['Z']['Q'] = 4; - match['Z']['E'] = 5; - match['Z']['G'] = -3; - match['Z']['H'] = 0; - match['Z']['I'] = -4; - match['Z']['L'] = -3; - match['Z']['K'] = 1; - match['Z']['M'] = -1; - match['Z']['F'] = -4; - match['Z']['P'] = -2; - match['Z']['S'] = 0; - match['Z']['T'] = -1; - match['Z']['W'] = -3; - match['Z']['Y'] = -3; - match['Z']['V'] = -3; - match['Z']['B'] = 0; - match['Z']['J'] = -3; - match['Z']['Z'] = 5; - match['Z']['X'] = -1; - match['Z']['*'] = -6; - - match['X']['A'] = -1; - match['X']['R'] = -1; - match['X']['N'] = -1; - match['X']['D'] = -1; - match['X']['C'] = -1; - match['X']['Q'] = -1; - match['X']['E'] = -1; - match['X']['G'] = -1; - match['X']['H'] = -1; - match['X']['I'] = -1; - match['X']['L'] = -1; - match['X']['K'] = -1; - match['X']['M'] = -1; - match['X']['F'] = -1; - match['X']['P'] = -1; - match['X']['S'] = -1; - match['X']['T'] = -1; - match['X']['W'] = -1; - match['X']['Y'] = -1; - match['X']['V'] = -1; - match['X']['B'] = -1; - match['X']['J'] = -1; - match['X']['Z'] = -1; - match['X']['X'] = -1; - match['X']['*'] = -6; - - match['*']['A'] = -6; - match['*']['R'] = -6; - match['*']['N'] = -6; - match['*']['D'] = -6; - match['*']['C'] = -6; - match['*']['Q'] = -6; - match['*']['E'] = -6; - match['*']['G'] = -6; - match['*']['H'] = -6; - match['*']['I'] = -6; - match['*']['L'] = -6; - match['*']['K'] = -6; - match['*']['M'] = -6; - match['*']['F'] = -6; - match['*']['P'] = -6; - match['*']['S'] = -6; - match['*']['T'] = -6; - match['*']['W'] = -6; - match['*']['Y'] = -6; - match['*']['V'] = -6; - match['*']['B'] = -6; - match['*']['J'] = -6; - match['*']['Z'] = -6; - match['*']['X'] = -6; - match['*']['*'] = 1; - } - - if (number == 45) { - match['A']['A'] = 5; - match['A']['R'] = -2; - match['A']['N'] = -1; - match['A']['D'] = -2; - match['A']['C'] = -1; - match['A']['Q'] = -1 ; - match['A']['E'] = -1; - match['A']['G'] = 0; - match['A']['H'] = -2; - match['A']['I'] = -1; - match['A']['L'] = -1; - match['A']['K'] = -1; - match['A']['M'] = -1; - match['A']['F'] = -2; - match['A']['P'] = -1; - match['A']['S'] = 1; - match['A']['T'] = 0; - match['A']['W'] = -2; - match['A']['Y'] = -2; - match['A']['V'] = 0; - match['A']['B'] = -1; - match['A']['J'] = -1; - match['A']['Z'] = -1; - match['A']['X'] = -1; - match['A']['*'] = -5; - - match['R']['A'] = -2; - match['R']['R'] = 7 ; - match['R']['N'] = 0; - match['R']['D'] = -1; - match['R']['C'] = -3; - match['R']['Q'] = 1; - match['R']['E'] = 0; - match['R']['G'] = -2; - match['R']['H'] = 0; - match['R']['I'] = -3; - match['R']['L'] = -2; - match['R']['K'] = 3; - match['R']['M'] = -1; - match['R']['F'] = -2; - match['R']['P'] = -2; - match['R']['S'] = -1; - match['R']['T'] = -1; - match['R']['W'] = -2; - match['R']['Y'] = -1; - match['R']['V'] = -2; - match['R']['B'] = -1; - match['R']['J'] = -3 ; - match['R']['Z'] = 1; - match['R']['X'] = -1; - match['R']['*'] = -5; - - match['N']['A'] = -1; - match['N']['R'] = 0; - match['N']['N'] = 6; - match['N']['D'] = 2; - match['N']['C'] = -2; - match['N']['Q'] = 0; - match['N']['E'] = 0; - match['N']['G'] = 0; - match['N']['H'] = 1; - match['N']['I'] = -2; - match['N']['L'] = -3; - match['N']['K'] = 0; - match['N']['M'] = -2; - match['N']['F'] = -2; - match['N']['P'] = -2; - match['N']['S'] = 1; - match['N']['T'] = 0; - match['N']['W'] = -4; - match['N']['Y'] = -2; - match['N']['V'] = -3; - match['N']['B'] = 5; - match['N']['J'] = -3; - match['N']['Z'] = 0; - match['N']['X'] = -1; - match['N']['*'] = -5; - - match['D']['A'] = -2; - match['D']['R'] = -1; - match['D']['N'] = 2; - match['D']['D'] = 7; - match['D']['C'] = -3; - match['D']['Q'] = 0; - match['D']['E'] = 2; - match['D']['G'] = -1; - match['D']['H'] = 0; - match['D']['I'] = -4; - match['D']['L'] = -3; - match['D']['K'] = 0; - match['D']['M'] = -3; - match['D']['F'] = -4; - match['D']['P'] = -1; - match['D']['S'] = 0; - match['D']['T'] = -1; - match['D']['W'] = -4; - match['D']['Y'] = -2; - match['D']['V'] = -3; - match['D']['B'] = 6; - match['D']['J'] = -3; - match['D']['Z'] = 1; - match['D']['X'] = -1; - match['D']['*'] = -5; - - match['C']['A'] = -1 ; - match['C']['R'] = -3; - match['C']['N'] = -2; - match['C']['D'] = -3; - match['C']['C'] = 12; - match['C']['Q'] = -3 ; - match['C']['E'] = -3 ; - match['C']['G'] = -3; - match['C']['H'] = -3; - match['C']['I'] = -3; - match['C']['L'] = -2 ; - match['C']['K'] = -3; - match['C']['M'] = -2; - match['C']['F'] = -2; - match['C']['P'] = -4 ; - match['C']['S'] = -1; - match['C']['T'] = -1; - match['C']['W'] = -5; - match['C']['Y'] = -3; - match['C']['V'] = -1; - match['C']['B'] = -2; - match['C']['J'] = -2; - match['C']['Z'] = -3; - match['C']['X'] = -1; - match['C']['*'] = -5; - - match['Q']['A'] = -1 ; - match['Q']['R'] = 1; - match['Q']['N'] = 0; - match['Q']['D'] = 0; - match['Q']['C'] = -3; - match['Q']['Q'] = 6; - match['Q']['E'] = 2; - match['Q']['G'] = -2; - match['Q']['H'] = 1; - match['Q']['I'] = -2; - match['Q']['L'] = -2 ; - match['Q']['K'] = 1 ; - match['Q']['M'] = 0; - match['Q']['F'] = -4; - match['Q']['P'] = -1; - match['Q']['S'] = 0; - match['Q']['T'] = -1; - match['Q']['W'] = -2; - match['Q']['Y'] = -1; - match['Q']['V'] = -3; - match['Q']['B'] = 0; - match['Q']['J'] = -2; - match['Q']['Z'] = 4; - match['Q']['X'] = -1; - match['Q']['*'] = -5; - - match['E']['A'] = 1; - match['E']['R'] = 0; - match['E']['N'] = 0; - match['E']['D'] = 2; - match['E']['C'] = -3; - match['E']['Q'] = 2; - match['E']['E'] = 6; - match['E']['G'] = -2; - match['E']['H'] = 0 ; - match['E']['I'] = -3; - match['E']['L'] = -2; - match['E']['K'] = 1; - match['E']['M'] = -2; - match['E']['F'] = -3; - match['E']['P'] = 0; - match['E']['S'] = 0; - match['E']['T'] = -1; - match['E']['W'] = -3; - match['E']['Y'] = -2; - match['E']['V'] = -3; - match['E']['B'] = 1; - match['E']['J'] = -3; - match['E']['Z'] = 5; - match['E']['X'] = -1; - match['E']['*'] = -5; - - match['G']['A'] = 0; - match['G']['R'] = -2; - match['G']['N'] = 0; - match['G']['D'] = -1; - match['G']['C'] = -3; - match['G']['Q'] = -2; - match['G']['E'] = -2; - match['G']['G'] = 7; - match['G']['H'] = -2; - match['G']['I'] = -4; - match['G']['L'] = -3; - match['G']['K'] = -2; - match['G']['M'] = -2; - match['G']['F'] = -3; - match['G']['P'] = -2; - match['G']['S'] = 0; - match['G']['T'] = -2; - match['G']['W'] = -2; - match['G']['Y'] = -3; - match['G']['V'] = -3; - match['G']['B'] = -1; - match['G']['J'] = -4; - match['G']['Z'] = -2; - match['G']['X'] = -1; - match['G']['*'] = -5; - - match['H']['A'] = -2 ; - match['H']['R'] = 0; - match['H']['N'] = 1; - match['H']['D'] = 0; - match['H']['C'] = -3; - match['H']['Q'] = 1; - match['H']['E'] = 0; - match['H']['G'] = -2; - match['H']['H'] = 10; - match['H']['I'] = -3; - match['H']['L'] = -2; - match['H']['K'] = -1; - match['H']['M'] = 0; - match['H']['F'] = -2; - match['H']['P'] = -2; - match['H']['S'] = -1; - match['H']['T'] = -2; - match['H']['W'] = -3; - match['H']['Y'] = 2; - match['H']['V'] = -3; - match['H']['B'] = 0; - match['H']['J'] = -2; - match['H']['Z'] = 0; - match['H']['X'] = -1; - match['H']['*'] = -5; - - match['I']['A'] = -1; - match['I']['R'] = -3; - match['I']['N'] = -2; - match['I']['D'] = -4 ; - match['I']['C'] = -3; - match['I']['Q'] = -2; - match['I']['E'] = -3; - match['I']['G'] = -4; - match['I']['H'] = -3; - match['I']['I'] = 5 ; - match['I']['L'] = 2; - match['I']['K'] = -3 ; - match['I']['M'] = 2; - match['I']['F'] = 0; - match['I']['P'] = -2 ; - match['I']['S'] = -2; - match['I']['T'] = -1; - match['I']['W'] = -2; - match['I']['Y'] = 0; - match['I']['V'] = 3; - match['I']['B'] = -3; - match['I']['J'] = 4; - match['I']['Z'] = -3; - match['I']['X'] = -1; - match['I']['*'] = -5; - - match['L']['A'] = -1; - match['L']['R'] = -2; - match['L']['N'] = -3; - match['L']['D'] = -3; - match['L']['C'] = -2 ; - match['L']['Q'] = -2; - match['L']['E'] = -2; - match['L']['G'] = -3 ; - match['L']['H'] = -2; - match['L']['I'] = 2; - match['L']['L'] = 5 ; - match['L']['K'] = -3; - match['L']['M'] = 2; - match['L']['F'] = 1; - match['L']['P'] = -3; - match['L']['S'] = -3; - match['L']['T'] = -1; - match['L']['W'] = -2 ; - match['L']['Y'] = 0; - match['L']['V'] = 1; - match['L']['B'] = -3; - match['L']['J'] = 4; - match['L']['Z'] = -2; - match['L']['X'] = -1; - match['L']['*'] = -5; - - match['K']['A'] = -1; - match['K']['R'] = 3; - match['K']['N'] = 0 ; - match['K']['D'] = 0; - match['K']['C'] = -3 ; - match['K']['Q'] = 1; - match['K']['E'] = 1; - match['K']['G'] = -2; - match['K']['H'] = -1 ; - match['K']['I'] = -3; - match['K']['L'] = -3; - match['K']['K'] = 5; - match['K']['M'] =-1; - match['K']['F'] = -3 ; - match['K']['P'] = -1 ; - match['K']['S'] = -1; - match['K']['T'] = -1; - match['K']['W'] = -2 ; - match['K']['Y'] = -1; - match['K']['V'] = -2; - match['K']['B'] = 0; - match['K']['J'] = -3; - match['K']['Z'] = 1; - match['K']['X'] = -1; - match['K']['*'] = -5; - - match['M']['A'] = -1; - match['M']['R'] = -1; - match['M']['N'] = -2 ; - match['M']['D'] = -3; - match['M']['C'] = -2; - match['M']['Q'] = 0; - match['M']['E'] = -2; - match['M']['G'] = -2; - match['M']['H'] = 0; - match['M']['I'] = 2; - match['M']['L'] = 2; - match['M']['K'] = -1 ; - match['M']['M'] = 6; - match['M']['F'] = 0; - match['M']['P'] = -2; - match['M']['S'] = -2; - match['M']['T'] = -1; - match['M']['W'] = -2; - match['M']['Y'] = 0; - match['M']['V'] = 1; - match['M']['B'] = -2; - match['M']['J'] = 2; - match['M']['Z'] = -1; - match['M']['X'] = -1; - match['M']['*'] = -5; - - match['F']['A'] = -2; - match['F']['R'] = -2; - match['F']['N'] = -2; - match['F']['D'] = -4; - match['F']['C'] = -2; - match['F']['Q'] = -4; - match['F']['E'] = -3; - match['F']['G'] = -3; - match['F']['H'] = -2; - match['F']['I'] = 0; - match['F']['L'] = 1; - match['F']['K'] = -3; - match['F']['M'] = 0; - match['F']['F'] = 8; - match['F']['P'] = -3; - match['F']['S'] = -2; - match['F']['T'] = -1; - match['F']['W'] = 1; - match['F']['Y'] = 3; - match['F']['V'] = 0; - match['F']['B'] = -3; - match['F']['J'] = 1; - match['F']['Z'] = -3; - match['F']['X'] = -1; - match['F']['*'] = -5; - - match['P']['A'] = -1; - match['P']['R'] = -2; - match['P']['N'] = -2 ; - match['P']['D'] = -1; - match['P']['C'] = -4 ; - match['P']['Q'] = -1; - match['P']['E'] = 0; - match['P']['G'] = -2; - match['P']['H'] = -2; - match['P']['I'] = -2; - match['P']['L'] = -3; - match['P']['K'] = -1; - match['P']['M'] = -2; - match['P']['F'] = -3; - match['P']['P'] = 9; - match['P']['S'] = -1; - match['P']['T'] = -1; - match['P']['W'] = -3; - match['P']['Y'] = -3; - match['P']['V'] = -3; - match['P']['B'] = -2; - match['P']['J'] = -3; - match['P']['Z'] = -1; - match['P']['X'] = -1; - match['P']['*'] = -5; - - match['S']['A'] = 1; - match['S']['R'] = -1; - match['S']['N'] = 1; - match['S']['D'] = 0; - match['S']['C'] = -1; - match['S']['Q'] = 0; - match['S']['E'] = 0; - match['S']['G'] = 0; - match['S']['H'] = -1; - match['S']['I'] = -2; - match['S']['L'] = -3; - match['S']['K'] = -1; - match['S']['M'] = -2; - match['S']['F'] = -2 ; - match['S']['P'] = -1; - match['S']['S'] = 4; - match['S']['T'] = 2; - match['S']['W'] = -4; - match['S']['Y'] = -2; - match['S']['V'] = -1; - match['S']['B'] = 0; - match['S']['J'] = -2; - match['S']['Z'] = 0; - match['S']['X'] = -1; - match['S']['*'] = -5; - - match['T']['A'] = 0; - match['T']['R'] = -1; - match['T']['N'] = 0; - match['T']['D'] = -1; - match['T']['C'] = -1; - match['T']['Q'] = -1; - match['T']['E'] = -1; - match['T']['G'] = -2; - match['T']['H'] = -2 ; - match['T']['I'] = -1; - match['T']['L'] = -1; - match['T']['K'] = -1; - match['T']['M'] = -1; - match['T']['F'] = -1; - match['T']['P'] = -1; - match['T']['S'] = 2; - match['T']['T'] = 5 ; - match['T']['W'] = -3; - match['T']['Y'] = -1; - match['T']['V'] = 0; - match['T']['B'] = 0; - match['T']['J'] = -1; - match['T']['Z'] = -1; - match['T']['X'] = -1; - match['T']['*'] = -5; - - match['W']['A'] = -2; - match['W']['R'] = -2; - match['W']['N'] = -4; - match['W']['D'] = -4; - match['W']['C'] = -5; - match['W']['Q'] = -2; - match['W']['E'] = -3; - match['W']['G'] = -2; - match['W']['H'] = -3; - match['W']['I'] = -2; - match['W']['L'] = -2; - match['W']['K'] = -2; - match['W']['M'] = -2; - match['W']['F'] = 1; - match['W']['P'] =-3 ; - match['W']['S'] = -4; - match['W']['T'] = -3; - match['W']['W'] = 15; - match['W']['Y'] = 3; - match['W']['V'] = -3; - match['W']['B'] = -4; - match['W']['J'] = -2; - match['W']['Z'] = -2; - match['W']['X'] = -1; - match['W']['*'] = -5; - - match['Y']['A'] = -2; - match['Y']['R'] = -1; - match['Y']['N'] = -2; - match['Y']['D'] = -2; - match['Y']['C'] = -3; - match['Y']['Q'] = -1; - match['Y']['E'] = -2; - match['Y']['G'] = -3; - match['Y']['H'] = 2; - match['Y']['I'] = 0; - match['Y']['L'] = 0; - match['Y']['K'] = -1; - match['Y']['M'] = 0; - match['Y']['F'] = 3; - match['Y']['P'] = -3; - match['Y']['S'] = -2; - match['Y']['T'] = -1; - match['Y']['W'] = 3; - match['Y']['Y'] = 8; - match['Y']['V'] = -1; - match['Y']['B'] = -2; - match['Y']['J'] = 0; - match['Y']['Z'] = -2; - match['Y']['X'] = -1; - match['Y']['*'] = -5; - - match['V']['A'] = 0; - match['V']['R'] = -2; - match['V']['N'] = -3; - match['V']['D'] = -3; - match['V']['C'] = -1; - match['V']['Q'] = -3; - match['V']['E'] = -3; - match['V']['G'] = -3; - match['V']['H'] = -3; - match['V']['I'] = 3; - match['V']['L'] = 1; - match['V']['K'] = -2; - match['V']['M'] = 1; - match['V']['F'] = 0; - match['V']['P'] = -3; - match['V']['S'] = -1; - match['V']['T'] = 0; - match['V']['W'] = -3; - match['V']['Y'] = -1; - match['V']['V'] = 5; - match['V']['B'] = -3; - match['V']['J'] = 2; - match['V']['Z'] = -3; - match['V']['X'] = -1; - match['V']['*'] = -5; - - match['B']['A'] = -1; - match['B']['R'] = -1; - match['B']['N'] = 5; - match['B']['D'] = 6; - match['B']['C'] = -2; - match['B']['Q'] = 0; - match['B']['E'] = 1; - match['B']['G'] = -1; - match['B']['H'] = 0; - match['B']['I'] = -3; - match['B']['L'] = -3; - match['B']['K'] = 0; - match['B']['M'] = -2; - match['B']['F'] = -3; - match['B']['P'] = -2; - match['B']['S'] = 0; - match['B']['T'] = 0; - match['B']['W'] = -4; - match['B']['Y'] = -2; - match['B']['V'] = -3; - match['B']['B'] = 5; - match['B']['J'] = -3; - match['B']['Z'] = 1; - match['B']['X'] = -1; - match['B']['*'] = -5; - - match['J']['A'] = -1; - match['J']['R'] = -3; - match['J']['N'] = -3; - match['J']['D'] = -3; - match['J']['C'] = -2; - match['J']['Q'] = -2; - match['J']['E'] = -3; - match['J']['G'] = -4; - match['J']['H'] = -2; - match['J']['I'] = 4; - match['J']['L'] = 4; - match['J']['K'] = -3; - match['J']['M'] = 2; - match['J']['F'] = 1 ; - match['J']['P'] = -3; - match['J']['S'] = -2; - match['J']['T'] = -1; - match['J']['W'] = -2; - match['J']['Y'] = 0; - match['J']['V'] = 2 ; - match['J']['B'] = -3 ; - match['J']['J'] = 4; - match['J']['Z'] = -2; - match['J']['X'] = -1; - match['J']['*'] = -5; - - match['Z']['A'] = -1; - match['Z']['R'] = 1; - match['Z']['N'] = 0; - match['Z']['D'] = 1; - match['Z']['C'] = -3; - match['Z']['Q'] = 4; - match['Z']['E'] = 5; - match['Z']['G'] = -2; - match['Z']['H'] = 0; - match['Z']['I'] = -3; - match['Z']['L'] = -2; - match['Z']['K'] = 1; - match['Z']['M'] = -1; - match['Z']['F'] = -3; - match['Z']['P'] = -1; - match['Z']['S'] = 0; - match['Z']['T'] = -1; - match['Z']['W'] = -2 ; - match['Z']['Y'] = -2; - match['Z']['V'] = -3; - match['Z']['B'] = 1; - match['Z']['J'] = -2; - match['Z']['Z'] = 5; - match['Z']['X'] = -1; - match['Z']['*'] = -5; - - match['X']['A'] = -1; - match['X']['R'] = -1; - match['X']['N'] = -1; - match['X']['D'] = -1; - match['X']['C'] = -1; - match['X']['Q'] = -1; - match['X']['E'] = -1; - match['X']['G'] = -1; - match['X']['H'] = -1; - match['X']['I'] = -1; - match['X']['L'] = -1; - match['X']['K'] = -1; - match['X']['M'] = -1; - match['X']['F'] = -1; - match['X']['P'] = -1; - match['X']['S'] = -1; - match['X']['T'] = -1 ; - match['X']['W'] = -1; - match['X']['Y'] = -1; - match['X']['V'] = -1; - match['X']['B'] = -1; - match['X']['J'] = -1; - match['X']['Z'] = -1; - match['X']['X'] = -1; - match['X']['*'] = -5; - - match['*']['A'] = -5; - match['*']['R'] = -5; - match['*']['N'] = -5; - match['*']['D'] = -5; - match['*']['C'] = -5; - match['*']['Q'] = -5; - match['*']['E'] = -5; - match['*']['G'] = -5; - match['*']['H'] = -5; - match['*']['I'] = -5; - match['*']['L'] = -5; - match['*']['K'] = -5; - match['*']['M'] = -5; - match['*']['F'] = -5; - match['*']['P'] = -5; - match['*']['S'] = -5; - match['*']['T'] = -5; - match['*']['W'] = -5; - match['*']['Y'] = -5; - match['*']['V'] = -5; - match['*']['B'] = -5; - match['*']['J'] = -5; - match['*']['Z'] = -5; - match['*']['X'] = -5; - match['*']['*'] = 1; - } - } /** * Fills the the similarity and direction matrixes of the two input sequences. diff --git a/src/main/java/nl/wur/bif/pantools/alignment/LocalSequenceAlignment.java b/src/main/java/nl/wur/bif/pantools/alignment/LocalSequenceAlignment.java index b0925a587..ef0485f47 100755 --- a/src/main/java/nl/wur/bif/pantools/alignment/LocalSequenceAlignment.java +++ b/src/main/java/nl/wur/bif/pantools/alignment/LocalSequenceAlignment.java @@ -1,5 +1,7 @@ package nl.wur.bif.pantools.alignment; +import nl.wur.bif.pantools.utils.FileUtils; + import java.util.Stack; /** @@ -30,7 +32,7 @@ public class LocalSequenceAlignment { private int insertions; private int similarity; private double identity; - private char TYPE; + private String TYPE; private int offset; private int range_len; private int[] score_array; @@ -47,7 +49,7 @@ public class LocalSequenceAlignment { * @param clip The stringency of soft-clipping in the range [0..3] * @param type Type of the input sequences(N for nucleotide P for peptide). */ - public LocalSequenceAlignment(int gap_open, int gap_ext, int max_length, int clip, char type) { + public LocalSequenceAlignment(int gap_open, int gap_ext, int max_length, int clip, String type) { int i, j; seq1 = new StringBuilder(); seq2 = new StringBuilder(); @@ -79,13 +81,8 @@ public class LocalSequenceAlignment { left[0][j] = -1000; matrix[0][j] = 0; direction[0][j] = 'D'; - } - if (TYPE == 'N') - initialize_NUCC_matrix(); - else if (TYPE == 'P') - initialize_BLOSUM_matrix(); - else - System.out.println("Aligner type should be N or P"); + } + match = FileUtils.loadScoringMatrix(TYPE); switch (CLIPPING_STRIGENCY) { case 1: mismatch_penalty = -1; @@ -101,869 +98,6 @@ public class LocalSequenceAlignment { break; } } - - /** - * Initializes the NUCC.1 scoring matrix. - */ - public final void initialize_NUCC_matrix() { - match = new int[256][256]; - - match['A']['A'] = 5; - match['A']['T'] = -4; - match['A']['G'] = -4; - match['A']['C'] = -4; - match['A']['S'] = -4; - match['A']['W'] = 1; - match['A']['R'] = 1; - match['A']['Y'] = -4; - match['A']['K'] = -4; - match['A']['M'] = 1; - match['A']['B'] = -4; - match['A']['V'] = -1; - match['A']['H'] = -1; - match['A']['D'] = -1; - match['A']['N'] = -2; - match['T']['A'] = -4; - match['T']['T'] = 5; - match['T']['G'] = -4; - match['T']['C'] = -4; - match['T']['S'] = -4; - match['T']['W'] = 1; - match['T']['R'] = -4; - match['T']['Y'] = 1; - match['T']['K'] = 1; - match['T']['M'] = -4; - match['T']['B'] = -1; - match['T']['V'] = -4; - match['T']['H'] = -1; - match['T']['D'] = -1; - match['T']['N'] = -2; - match['G']['A'] = -4; - match['G']['T'] = -4; - match['G']['G'] = 5; - match['G']['C'] = -4; - match['G']['S'] = 1; - match['G']['W'] = -4; - match['G']['R'] = 1; - match['G']['Y'] = -4; - match['G']['K'] = 1; - match['G']['M'] = -4; - match['G']['B'] = -1; - match['G']['V'] = -1; - match['G']['H'] = -4; - match['G']['D'] = -1; - match['G']['N'] = -2; - match['C']['A'] = -4; - match['C']['T'] = -4; - match['C']['G'] = -4; - match['C']['C'] = 5; - match['C']['S'] = 1; - match['C']['W'] = -4; - match['C']['R'] = -4; - match['C']['Y'] = 1; - match['C']['K'] = -4; - match['C']['M'] = 1; - match['C']['B'] = -1; - match['C']['V'] = -1; - match['C']['H'] = -1; - match['C']['D'] = -4; - match['C']['N'] = -2; - match['S']['A'] = -4; - match['S']['T'] = -4; - match['S']['G'] = 1; - match['S']['C'] = 1; - match['S']['S'] = -1; - match['S']['W'] = -4; - match['S']['R'] = -2; - match['S']['Y'] = -2; - match['S']['K'] = -2; - match['S']['M'] = -2; - match['S']['B'] = -1; - match['S']['V'] = -1; - match['S']['H'] = -3; - match['S']['D'] = -3; - match['S']['N'] = -1; - match['W']['A'] = 1; - match['W']['T'] = 1; - match['W']['G'] = -4; - match['W']['C'] = -4; - match['W']['S'] = -4; - match['W']['W'] = -1; - match['W']['R'] = -2; - match['W']['Y'] = -2; - match['W']['K'] = -2; - match['W']['M'] = -2; - match['W']['B'] = -3; - match['W']['V'] = -3; - match['W']['H'] = -1; - match['W']['D'] = -1; - match['W']['N'] = -1; - match['R']['A'] = 1; - match['R']['T'] = -4; - match['R']['G'] = 1; - match['R']['C'] = -4; - match['R']['S'] = -2; - match['R']['W'] = -2; - match['R']['R'] = -1; - match['R']['Y'] = -4; - match['R']['K'] = -2; - match['R']['M'] = -2; - match['R']['B'] = -3; - match['R']['V'] = -1; - match['R']['H'] = -3; - match['R']['D'] = -1; - match['R']['N'] = -1; - match['Y']['A'] = -4; - match['Y']['T'] = 1; - match['Y']['G'] = -4; - match['Y']['C'] = 1; - match['Y']['S'] = -2; - match['Y']['W'] = -2; - match['Y']['R'] = -4; - match['Y']['Y'] = -1; - match['Y']['K'] = -2; - match['Y']['M'] = -2; - match['Y']['B'] = -1; - match['Y']['V'] = -3; - match['Y']['H'] = -1; - match['Y']['D'] = -3; - match['Y']['N'] = -1; - match['K']['A'] = -4; - match['K']['T'] = 1; - match['K']['G'] = 1; - match['K']['C'] = -4; - match['K']['S'] = -2; - match['K']['W'] = -2; - match['K']['R'] = -2; - match['K']['Y'] = -2; - match['K']['K'] = -1; - match['K']['M'] = -4; - match['K']['B'] = -1; - match['K']['V'] = -3; - match['K']['H'] = -3; - match['K']['D'] = -1; - match['K']['N'] = -1; - match['M']['A'] = 1; - match['M']['T'] = -4; - match['M']['G'] = -4; - match['M']['C'] = 1; - match['M']['S'] = -2; - match['M']['W'] = -2; - match['M']['R'] = -2; - match['M']['Y'] = -2; - match['M']['K'] = -4; - match['M']['M'] = -1; - match['M']['B'] = -3; - match['M']['V'] = -1; - match['M']['H'] = -1; - match['M']['D'] = -3; - match['M']['N'] = -1; - match['B']['A'] = -4; - match['B']['T'] = -1; - match['B']['G'] = -1; - match['B']['C'] = -1; - match['B']['S'] = -1; - match['B']['W'] = -3; - match['B']['R'] = -3; - match['B']['Y'] = -1; - match['B']['K'] = -1; - match['B']['M'] = -3; - match['B']['B'] = -1; - match['B']['V'] = -2; - match['B']['H'] = -2; - match['B']['D'] = -2; - match['B']['N'] = -1; - match['V']['A'] = -1; - match['V']['T'] = -4; - match['V']['G'] = -1; - match['V']['C'] = -1; - match['V']['S'] = -1; - match['V']['W'] = -3; - match['V']['R'] = -1; - match['V']['Y'] = -3; - match['V']['K'] = -3; - match['V']['M'] = -1; - match['V']['B'] = -2; - match['V']['V'] = -1; - match['V']['H'] = -2; - match['V']['D'] = -2; - match['V']['N'] = -1; - match['H']['A'] = -1; - match['H']['T'] = -1; - match['H']['G'] = -4; - match['H']['C'] = -1; - match['H']['S'] = -3; - match['H']['W'] = -1; - match['H']['R'] = -3; - match['H']['Y'] = -1; - match['H']['K'] = -3; - match['H']['M'] = -1; - match['H']['B'] = -2; - match['H']['V'] = -2; - match['H']['H'] = -1; - match['H']['D'] = -2; - match['H']['N'] = -1; - match['D']['A'] = -1; - match['D']['T'] = -1; - match['D']['G'] = -1; - match['D']['C'] = -4; - match['D']['S'] = -3; - match['D']['W'] = -1; - match['D']['R'] = -1; - match['D']['Y'] = -3; - match['D']['K'] = -1; - match['D']['M'] = -3; - match['D']['B'] = -2; - match['D']['V'] = -2; - match['D']['H'] = -2; - match['D']['D'] = -1; - match['D']['N'] = -1; - match['N']['A'] = -2; - match['N']['T'] = -2; - match['N']['G'] = -2; - match['N']['C'] = -2; - match['N']['S'] = -1; - match['N']['W'] = -1; - match['N']['R'] = -1; - match['N']['Y'] = -1; - match['N']['K'] = -1; - match['N']['M'] = -1; - match['N']['B'] = -1; - match['N']['V'] = -1; - match['N']['H'] = -1; - match['N']['D'] = -1; - match['N']['N'] = -1; - - } - - /** - * Initializes the BLOSUM62 scoring matrix. - */ - public final void initialize_BLOSUM_matrix() { - match = new int[256][256]; - match['A']['A'] = 4; - match['A']['R'] = -1; - match['A']['N'] = -2; - match['A']['D'] = -2; - match['A']['C'] = 0; - match['A']['Q'] = -1; - match['A']['E'] = -1; - match['A']['G'] = 0; - match['A']['H'] = -2; - match['A']['I'] = -1; - match['A']['L'] = -1; - match['A']['K'] = -1; - match['A']['M'] = -1; - match['A']['F'] = -2; - match['A']['P'] = -1; - match['A']['S'] = 1; - match['A']['T'] = 0; - match['A']['W'] = -3; - match['A']['Y'] = -2; - match['A']['V'] = 0; - match['A']['B'] = -2; - match['A']['Z'] = -1; - match['A']['X'] = 0; - match['A']['*'] = -4; - - - match['R']['A'] = -1; - match['R']['R'] = 5; - match['R']['N'] = 0; - match['R']['D'] = -2; - match['R']['C'] = -3; - match['R']['Q'] = 1; - match['R']['E'] = 0; - match['R']['G'] = -2; - match['R']['H'] = 0; - match['R']['I'] = -3; - match['R']['L'] = -2; - match['R']['K'] = 2; - match['R']['M'] = -1; - match['R']['F'] = -3; - match['R']['P'] = -2; - match['R']['S'] = -1; - match['R']['T'] = -1; - match['R']['W'] = -3; - match['R']['Y'] = -2; - match['R']['V'] = -3; - match['R']['B'] = -1; - match['R']['Z'] = 0; - match['R']['X'] = -1; - match['R']['*'] = -4; - - - match['N']['A'] = -2; - match['N']['R'] = 0; - match['N']['N'] = 6; - match['N']['D'] = 1; - match['N']['C'] = -3; - match['N']['Q'] = 0; - match['N']['E'] = 0; - match['N']['G'] = 0; - match['N']['H'] = 1; - match['N']['I'] = -3; - match['N']['L'] = -3; - match['N']['K'] = 0; - match['N']['M'] = -2; - match['N']['F'] = -3; - match['N']['P'] = -2; - match['N']['S'] = 1; - match['N']['T'] = 0; - match['N']['W'] = -4; - match['N']['Y'] = -2; - match['N']['V'] = -3; - match['N']['B'] = 3; - match['N']['Z'] = 0; - match['N']['X'] = -1; - match['N']['*'] = -4; - - - match['D']['A'] = -2; - match['D']['R'] = -2; - match['D']['N'] = 1; - match['D']['D'] = 6; - match['D']['C'] = -3; - match['D']['Q'] = 0; - match['D']['E'] = 2; - match['D']['G'] = -1; - match['D']['H'] = -1; - match['D']['I'] = -3; - match['D']['L'] = -4; - match['D']['K'] = -1; - match['D']['M'] = -3; - match['D']['F'] = -3; - match['D']['P'] = -1; - match['D']['S'] = 0; - match['D']['T'] = -1; - match['D']['W'] = -4; - match['D']['Y'] = -3; - match['D']['V'] = -3; - match['D']['B'] = 4; - match['D']['Z'] = 1; - match['D']['X'] = -1; - match['D']['*'] = -4; - - - match['C']['A'] = 0; - match['C']['R'] = -3; - match['C']['N'] = -3; - match['C']['D'] = -3; - match['C']['C'] = 9; - match['C']['Q'] = -3; - match['C']['E'] = -4; - match['C']['G'] = -3; - match['C']['H'] = -3; - match['C']['I'] = -1; - match['C']['L'] = -1; - match['C']['K'] = -3; - match['C']['M'] = -1; - match['C']['F'] = -2; - match['C']['P'] = -3; - match['C']['S'] = -1; - match['C']['T'] = -1; - match['C']['W'] = -2; - match['C']['Y'] = -2; - match['C']['V'] = -1; - match['C']['B'] = -3; - match['C']['Z'] = -3; - match['C']['X'] = -2; - match['C']['*'] = -4; - - - match['Q']['A'] = -1; - match['Q']['R'] = 1; - match['Q']['N'] = 0; - match['Q']['D'] = 0; - match['Q']['C'] = -3; - match['Q']['Q'] = 5; - match['Q']['E'] = 2; - match['Q']['G'] = -2; - match['Q']['H'] = 0; - match['Q']['I'] = -3; - match['Q']['L'] = -2; - match['Q']['K'] = 1; - match['Q']['M'] = 0; - match['Q']['F'] = -3; - match['Q']['P'] = -1; - match['Q']['S'] = 0; - match['Q']['T'] = -1; - match['Q']['W'] = -2; - match['Q']['Y'] = -1; - match['Q']['V'] = -2; - match['Q']['B'] = 0; - match['Q']['Z'] = 3; - match['Q']['X'] = -1; - match['Q']['*'] = -4; - - - match['E']['A'] = -1; - match['E']['R'] = 0; - match['E']['N'] = 0; - match['E']['D'] = 2; - match['E']['C'] = -4; - match['E']['Q'] = 2; - match['E']['E'] = 5; - match['E']['G'] = -2; - match['E']['H'] = 0; - match['E']['I'] = -3; - match['E']['L'] = -3; - match['E']['K'] = 1; - match['E']['M'] = -2; - match['E']['F'] = -3; - match['E']['P'] = -1; - match['E']['S'] = 0; - match['E']['T'] = -1; - match['E']['W'] = -3; - match['E']['Y'] = -2; - match['E']['V'] = -2; - match['E']['B'] = 1; - match['E']['Z'] = 4; - match['E']['X'] = -1; - match['E']['*'] = -4; - - - match['G']['A'] = 0; - match['G']['R'] = -2; - match['G']['N'] = 0; - match['G']['D'] = -1; - match['G']['C'] = -3; - match['G']['Q'] = -2; - match['G']['E'] = -2; - match['G']['G'] = 6; - match['G']['H'] = -2; - match['G']['I'] = -4; - match['G']['L'] = -4; - match['G']['K'] = -2; - match['G']['M'] = -3; - match['G']['F'] = -3; - match['G']['P'] = -2; - match['G']['S'] = 0; - match['G']['T'] = -2; - match['G']['W'] = -2; - match['G']['Y'] = -3; - match['G']['V'] = -3; - match['G']['B'] = -1; - match['G']['Z'] = -2; - match['G']['X'] = -1; - match['G']['*'] = -4; - - - match['H']['A'] = -2; - match['H']['R'] = 0; - match['H']['N'] = 1; - match['H']['D'] = -1; - match['H']['C'] = -3; - match['H']['Q'] = 0; - match['H']['E'] = 0; - match['H']['G'] = -2; - match['H']['H'] = 8; - match['H']['I'] = -3; - match['H']['L'] = -3; - match['H']['K'] = -1; - match['H']['M'] = -2; - match['H']['F'] = -1; - match['H']['P'] = -2; - match['H']['S'] = -1; - match['H']['T'] = -2; - match['H']['W'] = -2; - match['H']['Y'] = 2; - match['H']['V'] = -3; - match['H']['B'] = 0; - match['H']['Z'] = 0; - match['H']['X'] = -1; - match['H']['*'] = -4; - - - match['I']['A'] = -1; - match['I']['R'] = -3; - match['I']['N'] = -3; - match['I']['D'] = -3; - match['I']['C'] = -1; - match['I']['Q'] = -3; - match['I']['E'] = -3; - match['I']['G'] = -4; - match['I']['H'] = -3; - match['I']['I'] = 4; - match['I']['L'] = 2; - match['I']['K'] = -3; - match['I']['M'] = 1; - match['I']['F'] = 0; - match['I']['P'] = -3; - match['I']['S'] = -2; - match['I']['T'] = -1; - match['I']['W'] = -3; - match['I']['Y'] = -1; - match['I']['V'] = 3; - match['I']['B'] = -3; - match['I']['Z'] = -3; - match['I']['X'] = -1; - match['I']['*'] = -4; - - - match['L']['A'] = -1; - match['L']['R'] = -2; - match['L']['N'] = -3; - match['L']['D'] = -4; - match['L']['C'] = -1; - match['L']['Q'] = -2; - match['L']['E'] = -3; - match['L']['G'] = -4; - match['L']['H'] = -3; - match['L']['I'] = 2; - match['L']['L'] = 4; - match['L']['K'] = -2; - match['L']['M'] = 2; - match['L']['F'] = 0; - match['L']['P'] = -3; - match['L']['S'] = -2; - match['L']['T'] = -1; - match['L']['W'] = -2; - match['L']['Y'] = -1; - match['L']['V'] = 1; - match['L']['B'] = -4; - match['L']['Z'] = -3; - match['L']['X'] = -1; - match['L']['*'] = -4; - - - match['K']['A'] = -1; - match['K']['R'] = 2; - match['K']['N'] = 0; - match['K']['D'] = -1; - match['K']['C'] = -3; - match['K']['Q'] = 1; - match['K']['E'] = 1; - match['K']['G'] = -2; - match['K']['H'] = -1; - match['K']['I'] = -3; - match['K']['L'] = -2; - match['K']['K'] = 5; - match['K']['M'] = -1; - match['K']['F'] = -3; - match['K']['P'] = -1; - match['K']['S'] = 0; - match['K']['T'] = -1; - match['K']['W'] = -3; - match['K']['Y'] = -2; - match['K']['V'] = -2; - match['K']['B'] = 0; - match['K']['Z'] = 1; - match['K']['X'] = -1; - match['K']['*'] = -4; - - - match['M']['A'] = -1; - match['M']['R'] = -1; - match['M']['N'] = -2; - match['M']['D'] = -3; - match['M']['C'] = -1; - match['M']['Q'] = 0; - match['M']['E'] = -2; - match['M']['G'] = -3; - match['M']['H'] = -2; - match['M']['I'] = 1; - match['M']['L'] = 2; - match['M']['K'] = -1; - match['M']['M'] = 5; - match['M']['F'] = 0; - match['M']['P'] = -2; - match['M']['S'] = -1; - match['M']['T'] = -1; - match['M']['W'] = -1; - match['M']['Y'] = -1; - match['M']['V'] = 1; - match['M']['B'] = -3; - match['M']['Z'] = -1; - match['M']['X'] = -1; - match['M']['*'] = -4; - - - match['F']['A'] = -2; - match['F']['R'] = -3; - match['F']['N'] = -3; - match['F']['D'] = -3; - match['F']['C'] = -2; - match['F']['Q'] = -3; - match['F']['E'] = -3; - match['F']['G'] = -3; - match['F']['H'] = -1; - match['F']['I'] = 0; - match['F']['L'] = 0; - match['F']['K'] = -3; - match['F']['M'] = 0; - match['F']['F'] = 6; - match['F']['P'] = -4; - match['F']['S'] = -2; - match['F']['T'] = -2; - match['F']['W'] = 1; - match['F']['Y'] = 3; - match['F']['V'] = -1; - match['F']['B'] = -3; - match['F']['Z'] = -3; - match['F']['X'] = -1; - match['F']['*'] = -4; - - - match['P']['A'] = -1; - match['P']['R'] = -2; - match['P']['N'] = -2; - match['P']['D'] = -1; - match['P']['C'] = -3; - match['P']['Q'] = -1; - match['P']['E'] = -1; - match['P']['G'] = -2; - match['P']['H'] = -2; - match['P']['I'] = -3; - match['P']['L'] = -3; - match['P']['K'] = -1; - match['P']['M'] = -2; - match['P']['F'] = -4; - match['P']['P'] = 7; - match['P']['S'] = -1; - match['P']['T'] = -1; - match['P']['W'] = -4; - match['P']['Y'] = -3; - match['P']['V'] = -2; - match['P']['B'] = -2; - match['P']['Z'] = -1; - match['P']['X'] = -2; - match['P']['*'] = -4; - - - match['S']['A'] = 1; - match['S']['R'] = -1; - match['S']['N'] = 1; - match['S']['D'] = 0; - match['S']['C'] = -1; - match['S']['Q'] = 0; - match['S']['E'] = 0; - match['S']['G'] = 0; - match['S']['H'] = -1; - match['S']['I'] = -2; - match['S']['L'] = -2; - match['S']['K'] = 0; - match['S']['M'] = -1; - match['S']['F'] = -2; - match['S']['P'] = -1; - match['S']['S'] = 4; - match['S']['T'] = 1; - match['S']['W'] = -3; - match['S']['Y'] = -2; - match['S']['V'] = -2; - match['S']['B'] = 0; - match['S']['Z'] = 0; - match['S']['X'] = 0; - match['S']['*'] = -4; - - - match['T']['A'] = 0; - match['T']['R'] = -1; - match['T']['N'] = 0; - match['T']['D'] = -1; - match['T']['C'] = -1; - match['T']['Q'] = -1; - match['T']['E'] = -1; - match['T']['G'] = -2; - match['T']['H'] = -2; - match['T']['I'] = -1; - match['T']['L'] = -1; - match['T']['K'] = -1; - match['T']['M'] = -1; - match['T']['F'] = -2; - match['T']['P'] = -1; - match['T']['S'] = 1; - match['T']['T'] = 5; - match['T']['W'] = -2; - match['T']['Y'] = -2; - match['T']['V'] = 0; - match['T']['B'] = -1; - match['T']['Z'] = -1; - match['T']['X'] = 0; - match['T']['*'] = -4; - - - match['W']['A'] = -3; - match['W']['R'] = -3; - match['W']['N'] = -4; - match['W']['D'] = -4; - match['W']['C'] = -2; - match['W']['Q'] = -2; - match['W']['E'] = -3; - match['W']['G'] = -2; - match['W']['H'] = -2; - match['W']['I'] = -3; - match['W']['L'] = -2; - match['W']['K'] = -3; - match['W']['M'] = -1; - match['W']['F'] = 1; - match['W']['P'] = -4; - match['W']['S'] = -3; - match['W']['T'] = -2; - match['W']['W'] = 11; - match['W']['Y'] = 2; - match['W']['V'] = -3; - match['W']['B'] = -4; - match['W']['Z'] = -3; - match['W']['X'] = -2; - match['W']['*'] = -4; - - - match['Y']['A'] = -2; - match['Y']['R'] = -2; - match['Y']['N'] = -2; - match['Y']['D'] = -3; - match['Y']['C'] = -2; - match['Y']['Q'] = -1; - match['Y']['E'] = -2; - match['Y']['G'] = -3; - match['Y']['H'] = 2; - match['Y']['I'] = -1; - match['Y']['L'] = -1; - match['Y']['K'] = -2; - match['Y']['M'] = -1; - match['Y']['F'] = 3; - match['Y']['P'] = -3; - match['Y']['S'] = -2; - match['Y']['T'] = -2; - match['Y']['W'] = 2; - match['Y']['Y'] = 7; - match['Y']['V'] = -1; - match['Y']['B'] = -3; - match['Y']['Z'] = -2; - match['Y']['X'] = -1; - match['Y']['*'] = -4; - - - match['V']['A'] = 0; - match['V']['R'] = -3; - match['V']['N'] = -3; - match['V']['D'] = -3; - match['V']['C'] = -1; - match['V']['Q'] = -2; - match['V']['E'] = -2; - match['V']['G'] = -3; - match['V']['H'] = -3; - match['V']['I'] = 3; - match['V']['L'] = 1; - match['V']['K'] = -2; - match['V']['M'] = 1; - match['V']['F'] = -1; - match['V']['P'] = -2; - match['V']['S'] = -2; - match['V']['T'] = 0; - match['V']['W'] = -3; - match['V']['Y'] = -1; - match['V']['V'] = 4; - match['V']['B'] = -3; - match['V']['Z'] = -2; - match['V']['X'] = -1; - match['V']['*'] = -4; - - - match['B']['A'] = -2; - match['B']['R'] = -1; - match['B']['N'] = 3; - match['B']['D'] = 4; - match['B']['C'] = -3; - match['B']['Q'] = 0; - match['B']['E'] = 1; - match['B']['G'] = -1; - match['B']['H'] = 0; - match['B']['I'] = -3; - match['B']['L'] = -4; - match['B']['K'] = 0; - match['B']['M'] = -3; - match['B']['F'] = -3; - match['B']['P'] = -2; - match['B']['S'] = 0; - match['B']['T'] = -1; - match['B']['W'] = -4; - match['B']['Y'] = -3; - match['B']['V'] = -3; - match['B']['B'] = 4; - match['B']['Z'] = 1; - match['B']['X'] = -1; - match['B']['*'] = -4; - - - match['Z']['A'] = -1; - match['Z']['R'] = 0; - match['Z']['N'] = 0; - match['Z']['D'] = 1; - match['Z']['C'] = -3; - match['Z']['Q'] = 3; - match['Z']['E'] = 4; - match['Z']['G'] = -2; - match['Z']['H'] = 0; - match['Z']['I'] = -3; - match['Z']['L'] = -3; - match['Z']['K'] = 1; - match['Z']['M'] = -1; - match['Z']['F'] = -3; - match['Z']['P'] = -1; - match['Z']['S'] = 0; - match['Z']['T'] = -1; - match['Z']['W'] = -3; - match['Z']['Y'] = -2; - match['Z']['V'] = -2; - match['Z']['B'] = 1; - match['Z']['Z'] = 4; - match['Z']['X'] = -1; - match['Z']['*'] = -4; - - - match['X']['A'] = 0; - match['X']['R'] = -1; - match['X']['N'] = -1; - match['X']['D'] = -1; - match['X']['C'] = -2; - match['X']['Q'] = -1; - match['X']['E'] = -1; - match['X']['G'] = -1; - match['X']['H'] = -1; - match['X']['I'] = -1; - match['X']['L'] = -1; - match['X']['K'] = -1; - match['X']['M'] = -1; - match['X']['F'] = -1; - match['X']['P'] = -2; - match['X']['S'] = 0; - match['X']['T'] = 0; - match['X']['W'] = -2; - match['X']['Y'] = -1; - match['X']['V'] = -1; - match['X']['B'] = -1; - match['X']['Z'] = -1; - match['X']['X'] = -1; - match['X']['*'] = -4; - - - match['*']['A'] = -4; - match['*']['R'] = -4; - match['*']['N'] = -4; - match['*']['D'] = -4; - match['*']['C'] = -4; - match['*']['Q'] = -4; - match['*']['E'] = -4; - match['*']['G'] = -4; - match['*']['H'] = -4; - match['*']['I'] = -4; - match['*']['L'] = -4; - match['*']['K'] = -4; - match['*']['M'] = -4; - match['*']['F'] = -4; - match['*']['P'] = -4; - match['*']['S'] = -4; - match['*']['T'] = -4; - match['*']['W'] = -4; - match['*']['Y'] = -4; - match['*']['V'] = -4; - match['*']['B'] = -4; - match['*']['Z'] = -4; - match['*']['X'] = -4; - match['*']['*'] = 1; - } /** * Fills the the similarity and direction matrixes of the two input sequences. diff --git a/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java b/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java index cc323a0e9..8731f7075 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java +++ b/src/main/java/nl/wur/bif/pantools/cli/AddFunctions.java @@ -28,11 +28,11 @@ public class AddFunctions implements Callable<Integer> { private Pantools pantools; @Parameters(descriptionKey = "functions-file", index = "0+") - @InputFile(message = "file.functions") + @InputFile(message = "{file.functions}") Path functionsFile; @Option(names = {"-A", "--annotations-file"}) - @InputFile(message = "{file.functions}") + @InputFile(message = "{file.annotations}") Path annotationsFile; @Option(names = "--function") diff --git a/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java b/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java index 48bf4217b..bcf5a7abf 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java +++ b/src/main/java/nl/wur/bif/pantools/cli/ConsensusTree.java @@ -3,6 +3,8 @@ package nl.wur.bif.pantools.cli; import nl.wur.bif.pantools.cli.mixins.SelectHmGroups; import nl.wur.bif.pantools.cli.mixins.ThreadNumber; import nl.wur.bif.pantools.cli.validation.BeanValidation; +import nl.wur.bif.pantools.cli.validation.Constraints; +import nl.wur.bif.pantools.cli.validation.Constraints.MatchInteger; import nl.wur.bif.pantools.pantools.Pantools; import picocli.CommandLine.Model.CommandSpec; @@ -32,6 +34,10 @@ public class ConsensusTree implements Callable<Integer> { @Option(names = {"-l", "--log"}, hidden = true) boolean log; + @Option(names = "--blosum") + @MatchInteger(value = {45, 50, 62, 80, 90}, message = "match.blosum") + int blosum; + @Option(names = {"--polytomies"}) boolean polytomies; @@ -47,6 +53,7 @@ public class ConsensusTree implements Callable<Integer> { private void setGlobalParameters() { THREADS = threadNumber.getnThreads(); + BLOSUM = blosum; LOG = log; ALLOW_POLYTOMIES = polytomies; } diff --git a/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java b/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java index 38e1a0367..ca0e66fe6 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java +++ b/src/main/java/nl/wur/bif/pantools/cli/CorePhylogeny.java @@ -5,6 +5,8 @@ import nl.wur.bif.pantools.cli.mixins.SelectGenomes; import nl.wur.bif.pantools.cli.mixins.SelectHmGroups; import nl.wur.bif.pantools.cli.mixins.ThreadNumber; import nl.wur.bif.pantools.cli.validation.BeanValidation; +import nl.wur.bif.pantools.cli.validation.Constraints; +import nl.wur.bif.pantools.cli.validation.Constraints.MatchInteger; import nl.wur.bif.pantools.pantools.Pantools; import picocli.CommandLine.Model.CommandSpec; @@ -40,9 +42,13 @@ public class CorePhylogeny implements Callable<Integer> { String phenotype; @Option(names = {"-m", "--clustering-mode"}) - @Pattern(regexp = "ML|NJ", flags = CASE_INSENSITIVE, message = "pattern.clustering-mode") + @Pattern(regexp = "ML|NJ", flags = CASE_INSENSITIVE, message = "{pattern.clustering-mode}") String mode; + @Option(names = "--blosum") + @MatchInteger(value = {45, 50, 62, 80, 90}, message = "match.blosum") + int blosum; + @Override public Integer call() throws IOException { pantools.createLogger(spec); @@ -56,6 +62,7 @@ public class CorePhylogeny implements Callable<Integer> { private void setGlobalParameters() { setGenomeSelectionOptions(selectGenomes); THREADS = threadNumber.getnThreads(); + BLOSUM = blosum; PHENOTYPE = phenotype; CLUSTERING_METHOD = mode; if (protein) Mode = "PROTEIN"; diff --git a/src/main/java/nl/wur/bif/pantools/cli/Group.java b/src/main/java/nl/wur/bif/pantools/cli/Group.java index e7686e535..a5b5a6476 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/Group.java +++ b/src/main/java/nl/wur/bif/pantools/cli/Group.java @@ -7,6 +7,7 @@ import jakarta.validation.constraints.Min; import nl.wur.bif.pantools.cli.mixins.SelectGenomes; import nl.wur.bif.pantools.cli.mixins.ThreadNumber; import nl.wur.bif.pantools.cli.validation.BeanValidation; +import nl.wur.bif.pantools.cli.validation.Constraints.ScoringMatrix; import nl.wur.bif.pantools.pantools.Pantools; import java.nio.file.Path; @@ -57,7 +58,11 @@ public class Group implements Callable<Integer> { @DecimalMax(value = "10", message = "{max.contrast}") static double contrast; - @ArgGroup(multiplicity = "1") RelaxationSettings relaxationSettings; + @Option(names = "--scoring-matrix") + @ScoringMatrix(message = "{scoring_matrix}") + String scoringMatrix; + + @ArgGroup RelaxationSettings relaxationSettings; private static class RelaxationSettings { @Option(names = {"--relaxation"}) void setParams(int value) { @@ -96,7 +101,7 @@ public class Group implements Callable<Integer> { pantools.setPangenomeGraph(); setGlobalParameters(); //TODO: use local parameters instead - proLayer.group(); + proLayer.group(scoringMatrix); return 0; } diff --git a/src/main/java/nl/wur/bif/pantools/cli/MSA.java b/src/main/java/nl/wur/bif/pantools/cli/MSA.java index 258eff7bb..3177ce05b 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/MSA.java +++ b/src/main/java/nl/wur/bif/pantools/cli/MSA.java @@ -69,7 +69,7 @@ public class MSA implements Callable<Integer> { String phenotype; @Option(names = "--blosum") - @MatchInteger(value = {45, 62, 80}, message = "match.blosum") + @MatchInteger(value = {45, 50, 62, 80, 90}, message = "match.blosum") int blosum; @Option(names = "--phenotype-threshold") @@ -108,6 +108,7 @@ public class MSA implements Callable<Integer> { if (regionsFile != null) PATH_TO_THE_REGIONS_FILE = regionsFile.toString(); if (functions != null) SELECTED_NAME = functions.toString().replaceAll("[\\[\\]]", ""); THREADS = threadNumber.getnThreads(); + BLOSUM = blosum; phenotype_threshold = phenotypeThreshold; Mode = (mode == null) ? "0" : mode.toUpperCase(); PHENOTYPE = phenotype; diff --git a/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java b/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java index 4db8aaa33..d567077bc 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java +++ b/src/main/java/nl/wur/bif/pantools/cli/OptimalGrouping.java @@ -59,6 +59,10 @@ public class OptimalGrouping implements Callable<Integer> { @Size(min = 1, message = "{size.empty.relaxation}") List<Integer> relaxation; + @Option(names = "--scoring-matrix") + @ScoringMatrix(message = "{scoring_matrix}") + String scoringMatrix; + @Override public Integer call() { pantools.createLogger(spec); @@ -67,7 +71,7 @@ public class OptimalGrouping implements Callable<Integer> { pantools.setPangenomeGraph(); setGlobalParameters(); //TODO: use local parameters instead - proLayer.optimal_grouping(); + proLayer.optimal_grouping(scoringMatrix); return 0; } diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java b/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java index 3f2d5180c..e855e6b5a 100644 --- a/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/Constraints.java @@ -84,6 +84,23 @@ public class Constraints { Class<? extends Payload>[] payload() default {}; } + /** + * Validate whether the scoring matrix exist in resources/scoring-matrices. + * Usage: @ScoringMatrix String matrixName + */ + @Documented + @Target({METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER, TYPE_USE}) + @Retention(RUNTIME) + @Constraint(validatedBy = ScoringMatrixValidator.class) + public @interface ScoringMatrix { + + String message() default "Matrix does not exist: (${validatedValue})"; + + Class<?>[] groups() default {}; + + Class<? extends Payload>[] payload() default {}; + } + /** * Validate an optional integer that has a minimum, bus is 0 when unassigned. * Usage: @MinOrZero(value=x) int integer diff --git a/src/main/java/nl/wur/bif/pantools/cli/validation/validators/ScoringMatrixValidator.java b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/ScoringMatrixValidator.java new file mode 100644 index 000000000..bac019b44 --- /dev/null +++ b/src/main/java/nl/wur/bif/pantools/cli/validation/validators/ScoringMatrixValidator.java @@ -0,0 +1,28 @@ +package nl.wur.bif.pantools.cli.validation.validators; + +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; +import nl.wur.bif.pantools.cli.validation.Constraints.ScoringMatrix; + +import java.io.InputStream; +import java.nio.file.Paths; + +import static org.apache.logging.log4j.core.util.Loader.getClassLoader; + + +public class ScoringMatrixValidator implements ConstraintValidator<ScoringMatrix, String> { + + @Override + public void initialize(ScoringMatrix constraintAnnotation) { + } + + @Override + public boolean isValid(String matrixName, ConstraintValidatorContext context) { + if (matrixName == null) return true; + + // get matrix files from the resources/scoring-matrices directory + final String resourceLocation = Paths.get("scoring-matrices").resolve(matrixName).toString(); + final InputStream inputStream = getClassLoader().getResourceAsStream(resourceLocation); + return inputStream != null; + } +} diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java index cbb3227f2..abdf0fc6f 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/Classification.java @@ -6,66 +6,33 @@ package nl.wur.bif.pantools.pangenome; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.List; -import java.util.HashMap; -import java.util.LinkedHashMap; - import cern.jet.math.Arithmetic; import nl.wur.bif.pantools.cli.RemoveFunctions; import nl.wur.bif.pantools.index.IndexDatabase; +import nl.wur.bif.pantools.index.IndexPointer; +import nl.wur.bif.pantools.index.IndexScanner; import nl.wur.bif.pantools.pantools.Pantools; import nl.wur.bif.pantools.sequence.SequenceDatabase; +import nl.wur.bif.pantools.sequence.SequenceScanner; +import nl.wur.bif.pantools.utils.FileUtils; import nl.wur.bif.pantools.utils.Utils; -import org.neo4j.graphdb.Label; -import java.util.Scanner; -import java.util.stream.Collectors; -import java.util.Map; import org.apache.commons.lang.ArrayUtils; -import nl.wur.bif.pantools.index.IndexPointer; -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Relationship; -import org.neo4j.graphdb.RelationshipType; -import org.neo4j.graphdb.ResourceIterator; -import org.neo4j.graphdb.Transaction; -import org.neo4j.graphdb.Direction; -import static nl.wur.bif.pantools.alignment.BoundedLocalSequenceAlignment.match; -import static nl.wur.bif.pantools.alignment.BoundedLocalSequenceAlignment.initialize_BLOSUM_matrix; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import org.neo4j.graphdb.NotFoundException; - -import java.io.File; +import org.neo4j.graphdb.*; + +import java.io.*; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.text.DecimalFormat; -import java.util.HashSet; -import java.util.Set; -import org.neo4j.graphdb.ResourceIterable; -import nl.wur.bif.pantools.sequence.SequenceScanner; -import nl.wur.bif.pantools.index.IndexScanner; -import java.util.Collections; -import java.util.Random; -import java.util.TreeSet; +import java.util.*; +import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.ConcurrentHashMap; - -import static nl.wur.bif.pantools.pangenome.create_skip_arrays.create_skip_arrays; - -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import static nl.wur.bif.pantools.alignment.BoundedLocalSequenceAlignment.match; import static nl.wur.bif.pantools.pangenome.FunctionalAnnotations.function_overview_per_group; - +import static nl.wur.bif.pantools.pangenome.create_skip_arrays.create_skip_arrays; import static nl.wur.bif.pantools.utils.Globals.*; import static nl.wur.bif.pantools.utils.Utils.*; @@ -9155,7 +9122,7 @@ public class Classification { public static void create_write_msa_trimmed_output(ArrayList<String> msaNames, String outdirMsa) { Pantools.logger.info("Reading alignments from {} for calculating variable/informative positions.", outdirMsa); ArrayList<String> nuc_or_prot_list = prepare_type_list_for_msa(); // list can contain "protein", "nucleotide" or both - initialize_BLOSUM(); + match = FileUtils.loadScoringMatrix("BLOSUM" + BLOSUM); delete_file_full_path(outdirMsa + "/groups_with_phenotype_specific_changes.txt"); for (String nuc_or_prot : nuc_or_prot_list) { @@ -9680,14 +9647,6 @@ public class Classification { System.exit(1); } } - - public static void initialize_BLOSUM() { - if (BLOSUM != 45 && BLOSUM != 62 && BLOSUM != 80) { - System.out.println("BLOSUM value must be 45, 62 or 80. Using BLOSUM62"); - BLOSUM = 62; - } - initialize_BLOSUM_matrix(BLOSUM); - } /** * @@ -9697,7 +9656,7 @@ public class Classification { public static void create_write_msa_no_trimming_output(ArrayList<String> msaNames, String outdirMsa) { Pantools.logger.info("Reading alignments from {} for calculating variable/informative positions.", outdirMsa); ArrayList<String> nuc_or_prot_list = prepare_type_list_for_msa(); // list can contain "protein", "nucleotide" or both - initialize_BLOSUM(); + match = FileUtils.loadScoringMatrix("BLOSUM" + BLOSUM); delete_file_full_path(outdirMsa + "/groups_with_phenotype_specific_changes.txt"); for (String nuc_or_prot : nuc_or_prot_list) { diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java index b9c4caa74..988f0a2d5 100755 --- a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java @@ -450,8 +450,8 @@ public class GenomeLayer { } pointer = new IndexPointer(); reference = new StringBuilder(); - bounded_aligner = new BoundedLocalSequenceAlignment(GAP_OPEN, GAP_EXT, MAX_ALIGNMENT_LENGTH, ALIGNMENT_BOUND, CLIPPING_STRINGENCY, 'N'); - aligner = new LocalSequenceAlignment(GAP_OPEN, GAP_EXT, MAX_ALIGNMENT_LENGTH, CLIPPING_STRINGENCY, 'N'); + bounded_aligner = new BoundedLocalSequenceAlignment(GAP_OPEN, GAP_EXT, MAX_ALIGNMENT_LENGTH, ALIGNMENT_BOUND, CLIPPING_STRINGENCY, "NUC.4.4"); + aligner = new LocalSequenceAlignment(GAP_OPEN, GAP_EXT, MAX_ALIGNMENT_LENGTH, CLIPPING_STRINGENCY, "NUC.4.4"); int_pair_comp = new IntPairComparator(); hit_counts = new ArrayList(); rand = new Random(1); diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java b/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java index 0a341c34a..4b789bc08 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/Phylogeny.java @@ -8,6 +8,7 @@ package nl.wur.bif.pantools.pangenome; import nl.wur.bif.pantools.index.IndexScanner; import nl.wur.bif.pantools.pantools.Pantools; import nl.wur.bif.pantools.sequence.SequenceScanner; +import nl.wur.bif.pantools.utils.FileUtils; import nl.wur.bif.pantools.utils.Utils; import org.apache.commons.lang.ArrayUtils; import org.neo4j.graphdb.*; @@ -21,7 +22,7 @@ import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import static nl.wur.bif.pantools.alignment.BoundedLocalSequenceAlignment.initialize_BLOSUM_matrix; +import static nl.wur.bif.pantools.alignment.BoundedLocalSequenceAlignment.match; import static nl.wur.bif.pantools.pangenome.Classification.*; import static nl.wur.bif.pantools.pangenome.create_skip_arrays.create_skip_arrays; import static nl.wur.bif.pantools.utils.Globals.*; @@ -485,7 +486,8 @@ public class Phylogeny { */ public String[] concatenate_pre_alignment(String[] gene_names) { String new_gene_names = ""; // exclude the genes that cannot be used - initialize_BLOSUM_matrix(62); + + match = FileUtils.loadScoringMatrix("BLOSUM62"); System.out.println("\rGene name: Maximum bases trimmed from START/END of nucleotide sequences -> " + "Parsimony informative sites in trimmed nucleotide alignment"); @@ -2293,7 +2295,7 @@ public class Phylogeny { msa.alignSequences(true, false); //NB: I do not check for alignment_phylogeny_mode.contains("nucleotide") currently tho ArrayList<String> updatedMsaNames = msa.getMsaNames(); //get msaNames, since this list is updated with trimming results - initialize_BLOSUM_matrix(BLOSUM); + match = FileUtils.loadScoringMatrix("BLOSUM" + BLOSUM); HashMap<String, String> groupInfoMap = gatherInfoCorePhylogenyLog(hmNodeList, updatedMsaNames); GRAPH_DB.shutdown(); diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java index ac3a3722c..f7ff8fe06 100644 --- a/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/ProteomeLayer.java @@ -413,8 +413,8 @@ public class ProteomeLayer { StringBuilder query; StringBuilder subject; LocalSequenceAlignment aligner; - public Find_similarities() { - aligner = new LocalSequenceAlignment(GAP_OPEN, GAP_EXT,MAX_ALIGNMENT_LENGTH, 0, 'P'); + public Find_similarities(String scoringMatrix) { + aligner = new LocalSequenceAlignment(GAP_OPEN, GAP_EXT,MAX_ALIGNMENT_LENGTH, 0, scoringMatrix); query = new StringBuilder(); subject = new StringBuilder(); } @@ -1080,7 +1080,7 @@ public class ProteomeLayer { D7 - similarity: 35%, mcl_inflation: 3.6, intersection: 0.02, contrast: 2 D8 - similarity: 25%, mcl_inflation: 2.4, intersection: 0.01, contrast: 1 */ - public void optimal_grouping() { + public void optimal_grouping(String scoringMatrix) { if (Mode.contains("FAST")) { FAST = true; // check if the F1 score dropped compared to the previous grouping, stop when this is the case } @@ -1109,7 +1109,7 @@ public class ProteomeLayer { } System.out.println("Grouping " + (i+1) + ", similarity = " + MIN_NORMALIZED_SIMILARITY); first_generate_proteins = true; - group(); + group(scoringMatrix); /*append_string_to_file_full_path(date + "\npantools optimal_grouping\nRuntime after grouping relaxation: " + (i+1) + "\n", WORKING_DIRECTORY + "/log/used_commands"); print_runtime(false, true);*/ @@ -2793,7 +2793,7 @@ public class ProteomeLayer { /** * Groups similar proteins into homology groups. */ - public void group() { + public void group(String scoringMatrix) { MAX_TRANSACTION_SIZE = 1000; progress = ""; System.out.println("\nClustering protein sequences\n"); @@ -2891,7 +2891,7 @@ public class ProteomeLayer { es.execute(new Generate_proteins()); es.execute(new Find_intersections(num_proteins)); for(int i = 1; i <= THREADS - 2; i++) { - es.execute(new Find_similarities()); + es.execute(new Find_similarities(scoringMatrix)); } es.execute(new Write_similarities()); es.shutdown(); diff --git a/src/main/java/nl/wur/bif/pantools/utils/FileUtils.java b/src/main/java/nl/wur/bif/pantools/utils/FileUtils.java index 022829af8..793b6f2f5 100644 --- a/src/main/java/nl/wur/bif/pantools/utils/FileUtils.java +++ b/src/main/java/nl/wur/bif/pantools/utils/FileUtils.java @@ -9,14 +9,50 @@ import org.apache.commons.io.IOUtils; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Scanner; import java.util.zip.GZIPInputStream; -/** - * Class for pantools file utility functions. - * - * @author Roel van Esch, Bioinformatics group, Wageningen University, the Netherlands - */ +import static org.apache.logging.log4j.core.util.Loader.getClassLoader; + public class FileUtils { + /** + * Load a matrix file from the project resources. + * @param matrixName name of the scoring matrix file + * @return scoring matrix that can be indexed by char values + */ + public static int[][] loadScoringMatrix(String matrixName) { + // initialize empty matrix and column headers + final int[][] scoringMatrix = new int[256][256]; + char[] headers = null; + + // load matrix file from project resources + final String resourceLocation = Paths.get("scoring-matrices").resolve(matrixName).toString(); + final InputStream inputStream = getClassLoader().getResourceAsStream(resourceLocation); + assert inputStream != null; + final Scanner matrixFile = new Scanner(inputStream); + + // scan the matrix + while (matrixFile.hasNextLine()) { + final String line = matrixFile.nextLine(); + if (line.length() == 0 || line.startsWith("#")) continue; + if (headers == null) { + // set header char values + headers = line.replaceAll("\\s+", "").toCharArray(); + continue; + } + // add values to matrix using char col/row headers as indices + final String[] values = line.split("\\s+"); + final char a = values[0].charAt(0); + for (int i = 1; i < values.length; i++) { + final char b = headers[i - 1]; + scoringMatrix[a][b] = Integer.parseInt(values[i]); + } + } + matrixFile.close(); + return scoringMatrix; + } + /** * Add a shutdown hook to a temporary directory to remove it on system exit. * @param directory path of temporary directory diff --git a/src/main/java/nl/wur/bif/pantools/utils/Globals.java b/src/main/java/nl/wur/bif/pantools/utils/Globals.java index ac07d4332..5a537880c 100644 --- a/src/main/java/nl/wur/bif/pantools/utils/Globals.java +++ b/src/main/java/nl/wur/bif/pantools/utils/Globals.java @@ -175,7 +175,7 @@ public class Globals { public static boolean ALLOW_POLYTOMIES = false; public static boolean TRIMMING = true; public static String msaMethod = "per_group"; - public static int BLOSUM = 62; + public static int BLOSUM; public static HashMap<String, Integer> phenotype_threshold_map; public static HashMap<Integer, String> geno_pheno_map; // genome number is coupled to its phenotype diff --git a/src/main/resources/Defaults.properties b/src/main/resources/Defaults.properties index 57343d848..fa34afbe2 100644 --- a/src/main/resources/Defaults.properties +++ b/src/main/resources/Defaults.properties @@ -4,6 +4,8 @@ threads = 0 core-threshold = 100 phenotype-threshold = 100 unique-threshold = 0 +scoring-matrix = BLOSUM62 +blosum = 62 # subcommands # add_phenotypes diff --git a/src/main/resources/ErrorMessages.properties b/src/main/resources/ErrorMessages.properties index f7957befc..188f50f0b 100644 --- a/src/main/resources/ErrorMessages.properties +++ b/src/main/resources/ErrorMessages.properties @@ -26,6 +26,8 @@ file.short-read = Short-read file(s) not found file.tree = Phylogenetic tree file not found (${validatedValue}) match.blosum = --blosum should be 45, 62 or 80 +no-protein = Protein scoring matrices are not allowed for this command, choose a nucleotide matrix like NUC.4.4 +no-nuc = Nucleotide scoring matrices are not allowed for this command, choose a protein matrix like BLOSUM62 max.alignment-band = --alignment-band must be less than or equal to {value} max.contrast = --contrast must be less than or equal to {value} @@ -94,6 +96,9 @@ positive.threads = --threads must be greater than 0 range.clipping-stringency = '--clipping-stringency' must be in range [0..3] range.ir = --intersection-rate must be in range [0.001,0.1] +scoring_matrix = Invalid scoring matrix, choose between BLOSUM45, BLOSUM50, BLOSUM62, BLOSUM80, BLOSUM90, \ + PAM30, PAM70 and PAM250. + size.empty.exclude = --exclude should consist of comma separated integers or ranges for genomes \ (example: --exclude 1,2,5-10) size.empty.include = --include should consist of comma separated integers or ranges for genomes \ diff --git a/src/main/resources/MessageBundle.properties b/src/main/resources/MessageBundle.properties index 1191a8ee3..5b55fa997 100644 --- a/src/main/resources/MessageBundle.properties +++ b/src/main/resources/MessageBundle.properties @@ -151,6 +151,8 @@ pantools.export_pangenome.usage.description = Export a pangenome built with buil # Options annotations-file = A text file with the identifiers of annotations to be included. +blosum = A BLOSUM matrix to be used for the calculation of protein similarity. Allowed values are 45, 50, 62 80 and 90 \ + (default: ${DEFAULT-VALUE}). core-threshold = Threshold (%%) for (soft) core genes. Default is ${DEFAULT-VALUE}%% of genomes. database-path = Path to the database root directory. exclude = Exclude a selection of genomes. @@ -175,6 +177,7 @@ regions-file = A text file containing genome locations with on each line: a geno end position, separated by a space. threads = Number of parallel working threads, default is the number of cores or 8, whichever is lower. unique-threshold = Threshold (%%) for unique/cloud genes. Default is a single genome, not a percentage. +scoring-matrix = The scoring matrix used (default: ${DEFAULT-VALUE}). # Subcommand options # AddAnnotations @@ -328,8 +331,6 @@ pantools.mlsa_concatenate.threads = Number of threads for MAFFT, \ pantools.mlsa_find_genes.include = Only search for genes in a selection of genomes. pantools.mlsa_find_genes.exclude = Do not search for genes in this selection of genomes. # MSA -pantools.msa.blosum = A BLOSUM matrix to be used for the calculation of protein similarity. Allowed values are 45, 62 \ - and 80 (default: ${DEFAULT-VALUE}). pantools.msa.no-fasttree = Run FastTree (default: ${DEFAULT-VALUE}). pantools.msa.functions = For specifying one or multiple functional domains \ (Only used when @|fg(yellow) --method=functions|@). diff --git a/src/main/resources/scoring-matrices/BLOSUM45 b/src/main/resources/scoring-matrices/BLOSUM45 new file mode 100644 index 000000000..80ba30f56 --- /dev/null +++ b/src/main/resources/scoring-matrices/BLOSUM45 @@ -0,0 +1,27 @@ +# Entries for the BLOSUM45 matrix at a scale of ln(2)/3.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -2 -2 0 -1 -1 -1 -1 -5 +R -2 7 0 -1 -3 1 0 -2 0 -3 -2 3 -1 -2 -2 -1 -1 -2 -1 -2 -1 -3 1 -1 -5 +N -1 0 6 2 -2 0 0 0 1 -2 -3 0 -2 -2 -2 1 0 -4 -2 -3 5 -3 0 -1 -5 +D -2 -1 2 7 -3 0 2 -1 0 -4 -3 0 -3 -4 -1 0 -1 -4 -2 -3 6 -3 1 -1 -5 +C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -2 -3 -1 -5 +Q -1 1 0 0 -3 6 2 -2 1 -2 -2 1 0 -4 -1 0 -1 -2 -1 -3 0 -2 4 -1 -5 +E -1 0 0 2 -3 2 6 -2 0 -3 -2 1 -2 -3 0 0 -1 -3 -2 -3 1 -3 5 -1 -5 +G 0 -2 0 -1 -3 -2 -2 7 -2 -4 -3 -2 -2 -3 -2 0 -2 -2 -3 -3 -1 -4 -2 -1 -5 +H -2 0 1 0 -3 1 0 -2 10 -3 -2 -1 0 -2 -2 -1 -2 -3 2 -3 0 -2 0 -1 -5 +I -1 -3 -2 -4 -3 -2 -3 -4 -3 5 2 -3 2 0 -2 -2 -1 -2 0 3 -3 4 -3 -1 -5 +L -1 -2 -3 -3 -2 -2 -2 -3 -2 2 5 -3 2 1 -3 -3 -1 -2 0 1 -3 4 -2 -1 -5 +K -1 3 0 0 -3 1 1 -2 -1 -3 -3 5 -1 -3 -1 -1 -1 -2 -1 -2 0 -3 1 -1 -5 +M -1 -1 -2 -3 -2 0 -2 -2 0 2 2 -1 6 0 -2 -2 -1 -2 0 1 -2 2 -1 -1 -5 +F -2 -2 -2 -4 -2 -4 -3 -3 -2 0 1 -3 0 8 -3 -2 -1 1 3 0 -3 1 -3 -1 -5 +P -1 -2 -2 -1 -4 -1 0 -2 -2 -2 -3 -1 -2 -3 9 -1 -1 -3 -3 -3 -2 -3 -1 -1 -5 +S 1 -1 1 0 -1 0 0 0 -1 -2 -3 -1 -2 -2 -1 4 2 -4 -2 -1 0 -2 0 -1 -5 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1 2 5 -3 -1 0 0 -1 -1 -1 -5 +W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2 1 -3 -4 -3 15 3 -3 -4 -2 -2 -1 -5 +Y -2 -1 -2 -2 -3 -1 -2 -3 2 0 0 -1 0 3 -3 -2 -1 3 8 -1 -2 0 -2 -1 -5 +V 0 -2 -3 -3 -1 -3 -3 -3 -3 3 1 -2 1 0 -3 -1 0 -3 -1 5 -3 2 -3 -1 -5 +B -1 -1 5 6 -2 0 1 -1 0 -3 -3 0 -2 -3 -2 0 0 -4 -2 -3 5 -3 1 -1 -5 +J -1 -3 -3 -3 -2 -2 -3 -4 -2 4 4 -3 2 1 -3 -2 -1 -2 0 2 -3 4 -2 -1 -5 +Z -1 1 0 1 -3 4 5 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -3 1 -2 5 -1 -5 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -5 +* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/BLOSUM50 b/src/main/resources/scoring-matrices/BLOSUM50 new file mode 100644 index 000000000..fce0f81b5 --- /dev/null +++ b/src/main/resources/scoring-matrices/BLOSUM50 @@ -0,0 +1,27 @@ +# Entries for the BLOSUM50 matrix at a scale of ln(2)/3.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -2 -1 -1 -5 +R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 -3 0 -1 -5 +N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 5 -4 0 -1 -5 +D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 6 -4 1 -1 -5 +C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -2 -3 -1 -5 +Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 -3 4 -1 -5 +E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 -3 5 -1 -5 +G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -4 -2 -1 -5 +H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 -3 0 -1 -5 +I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 4 -3 -1 -5 +L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 4 -3 -1 -5 +K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 -3 1 -1 -5 +M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 2 -1 -1 -5 +F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 1 -4 -1 -5 +P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -3 -1 -1 -5 +S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 -3 0 -1 -5 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 -1 -1 -5 +W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -2 -1 -5 +Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -1 -2 -1 -5 +V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -3 2 -3 -1 -5 +B -2 -1 5 6 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -3 6 -4 1 -1 -5 +J -2 -3 -4 -4 -2 -3 -3 -4 -3 4 4 -3 2 1 -3 -3 -1 -2 -1 2 -4 4 -3 -1 -5 +Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 1 -3 5 -1 -5 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -5 +* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/BLOSUM62 b/src/main/resources/scoring-matrices/BLOSUM62 new file mode 100644 index 000000000..0d184f05c --- /dev/null +++ b/src/main/resources/scoring-matrices/BLOSUM62 @@ -0,0 +1,27 @@ +# Entries for the BLOSUM62 matrix at a scale of ln(2)/2.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 -1 -1 -4 +R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 -2 0 -1 -4 +N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 4 -3 0 -1 -4 +D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 -3 1 -1 -4 +C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -1 -3 -1 -4 +Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 -2 4 -1 -4 +E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 -3 4 -1 -4 +G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -4 -2 -1 -4 +H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 -3 0 -1 -4 +I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 3 -3 -1 -4 +L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 3 -3 -1 -4 +K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 -3 1 -1 -4 +M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 2 -1 -1 -4 +F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 0 -3 -1 -4 +P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -3 -1 -1 -4 +S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 -2 0 -1 -4 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 -1 -1 -4 +W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -2 -2 -1 -4 +Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -1 -2 -1 -4 +V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 2 -2 -1 -4 +B -2 -1 4 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 -3 0 -1 -4 +J -1 -2 -3 -3 -1 -2 -3 -4 -3 3 3 -3 2 0 -3 -2 -1 -2 -1 2 -3 3 -3 -1 -4 +Z -1 0 0 1 -3 4 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -2 -2 -2 0 -3 4 -1 -4 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -4 +* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/BLOSUM80 b/src/main/resources/scoring-matrices/BLOSUM80 new file mode 100644 index 000000000..970f9287f --- /dev/null +++ b/src/main/resources/scoring-matrices/BLOSUM80 @@ -0,0 +1,27 @@ +# Entries for the BLOSUM80 matrix at a scale of ln(2)/2.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 5 -2 -2 -2 -1 -1 -1 0 -2 -2 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -2 -1 -1 -6 +R -2 6 -1 -2 -4 1 -1 -3 0 -3 -3 2 -2 -4 -2 -1 -1 -4 -3 -3 -1 -3 0 -1 -6 +N -2 -1 6 1 -3 0 -1 -1 0 -4 -4 0 -3 -4 -3 0 0 -4 -3 -4 5 -4 0 -1 -6 +D -2 -2 1 6 -4 -1 1 -2 -2 -4 -5 -1 -4 -4 -2 -1 -1 -6 -4 -4 5 -5 1 -1 -6 +C -1 -4 -3 -4 9 -4 -5 -4 -4 -2 -2 -4 -2 -3 -4 -2 -1 -3 -3 -1 -4 -2 -4 -1 -6 +Q -1 1 0 -1 -4 6 2 -2 1 -3 -3 1 0 -4 -2 0 -1 -3 -2 -3 0 -3 4 -1 -6 +E -1 -1 -1 1 -5 2 6 -3 0 -4 -4 1 -2 -4 -2 0 -1 -4 -3 -3 1 -4 5 -1 -6 +G 0 -3 -1 -2 -4 -2 -3 6 -3 -5 -4 -2 -4 -4 -3 -1 -2 -4 -4 -4 -1 -5 -3 -1 -6 +H -2 0 0 -2 -4 1 0 -3 8 -4 -3 -1 -2 -2 -3 -1 -2 -3 2 -4 -1 -4 0 -1 -6 +I -2 -3 -4 -4 -2 -3 -4 -5 -4 5 1 -3 1 -1 -4 -3 -1 -3 -2 3 -4 3 -4 -1 -6 +L -2 -3 -4 -5 -2 -3 -4 -4 -3 1 4 -3 2 0 -3 -3 -2 -2 -2 1 -4 3 -3 -1 -6 +K -1 2 0 -1 -4 1 1 -2 -1 -3 -3 5 -2 -4 -1 -1 -1 -4 -3 -3 -1 -3 1 -1 -6 +M -1 -2 -3 -4 -2 0 -2 -4 -2 1 2 -2 6 0 -3 -2 -1 -2 -2 1 -3 2 -1 -1 -6 +F -3 -4 -4 -4 -3 -4 -4 -4 -2 -1 0 -4 0 6 -4 -3 -2 0 3 -1 -4 0 -4 -1 -6 +P -1 -2 -3 -2 -4 -2 -2 -3 -3 -4 -3 -1 -3 -4 8 -1 -2 -5 -4 -3 -2 -4 -2 -1 -6 +S 1 -1 0 -1 -2 0 0 -1 -1 -3 -3 -1 -2 -3 -1 5 1 -4 -2 -2 0 -3 0 -1 -6 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -2 -1 -1 -2 -2 1 5 -4 -2 0 -1 -1 -1 -1 -6 +W -3 -4 -4 -6 -3 -3 -4 -4 -3 -3 -2 -4 -2 0 -5 -4 -4 11 2 -3 -5 -3 -3 -1 -6 +Y -2 -3 -3 -4 -3 -2 -3 -4 2 -2 -2 -3 -2 3 -4 -2 -2 2 7 -2 -3 -2 -3 -1 -6 +V 0 -3 -4 -4 -1 -3 -3 -4 -4 3 1 -3 1 -1 -3 -2 0 -3 -2 4 -4 2 -3 -1 -6 +B -2 -1 5 5 -4 0 1 -1 -1 -4 -4 -1 -3 -4 -2 0 -1 -5 -3 -4 5 -4 0 -1 -6 +J -2 -3 -4 -5 -2 -3 -4 -5 -4 3 3 -3 2 0 -4 -3 -1 -3 -2 2 -4 3 -3 -1 -6 +Z -1 0 0 1 -4 4 5 -3 0 -4 -3 1 -1 -4 -2 0 -1 -3 -3 -3 0 -3 5 -1 -6 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -6 +* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/BLOSUM90 b/src/main/resources/scoring-matrices/BLOSUM90 new file mode 100644 index 000000000..066def786 --- /dev/null +++ b/src/main/resources/scoring-matrices/BLOSUM90 @@ -0,0 +1,27 @@ +# Entries for the BLOSUM90 matrix at a scale of ln(2)/2.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 5 -2 -2 -3 -1 -1 -1 0 -2 -2 -2 -1 -2 -3 -1 1 0 -4 -3 -1 -2 -2 -1 -1 -6 +R -2 6 -1 -3 -5 1 -1 -3 0 -4 -3 2 -2 -4 -3 -1 -2 -4 -3 -3 -2 -3 0 -1 -6 +N -2 -1 7 1 -4 0 -1 -1 0 -4 -4 0 -3 -4 -3 0 0 -5 -3 -4 5 -4 -1 -1 -6 +D -3 -3 1 7 -5 -1 1 -2 -2 -5 -5 -1 -4 -5 -3 -1 -2 -6 -4 -5 5 -5 1 -1 -6 +C -1 -5 -4 -5 9 -4 -6 -4 -5 -2 -2 -4 -2 -3 -4 -2 -2 -4 -4 -2 -4 -2 -5 -1 -6 +Q -1 1 0 -1 -4 7 2 -3 1 -4 -3 1 0 -4 -2 -1 -1 -3 -3 -3 -1 -3 5 -1 -6 +E -1 -1 -1 1 -6 2 6 -3 -1 -4 -4 0 -3 -5 -2 -1 -1 -5 -4 -3 1 -4 5 -1 -6 +G 0 -3 -1 -2 -4 -3 -3 6 -3 -5 -5 -2 -4 -5 -3 -1 -3 -4 -5 -5 -2 -5 -3 -1 -6 +H -2 0 0 -2 -5 1 -1 -3 8 -4 -4 -1 -3 -2 -3 -2 -2 -3 1 -4 -1 -4 0 -1 -6 +I -2 -4 -4 -5 -2 -4 -4 -5 -4 5 1 -4 1 -1 -4 -3 -1 -4 -2 3 -5 3 -4 -1 -6 +L -2 -3 -4 -5 -2 -3 -4 -5 -4 1 5 -3 2 0 -4 -3 -2 -3 -2 0 -5 4 -4 -1 -6 +K -1 2 0 -1 -4 1 0 -2 -1 -4 -3 6 -2 -4 -2 -1 -1 -5 -3 -3 -1 -3 1 -1 -6 +M -2 -2 -3 -4 -2 0 -3 -4 -3 1 2 -2 7 -1 -3 -2 -1 -2 -2 0 -4 2 -2 -1 -6 +F -3 -4 -4 -5 -3 -4 -5 -5 -2 -1 0 -4 -1 7 -4 -3 -3 0 3 -2 -4 0 -4 -1 -6 +P -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -3 -4 8 -2 -2 -5 -4 -3 -3 -4 -2 -1 -6 +S 1 -1 0 -1 -2 -1 -1 -1 -2 -3 -3 -1 -2 -3 -2 5 1 -4 -3 -2 0 -3 -1 -1 -6 +T 0 -2 0 -2 -2 -1 -1 -3 -2 -1 -2 -1 -1 -3 -2 1 6 -4 -2 -1 -1 -2 -1 -1 -6 +W -4 -4 -5 -6 -4 -3 -5 -4 -3 -4 -3 -5 -2 0 -5 -4 -4 11 2 -3 -6 -3 -4 -1 -6 +Y -3 -3 -3 -4 -4 -3 -4 -5 1 -2 -2 -3 -2 3 -4 -3 -2 2 8 -3 -4 -2 -3 -1 -6 +V -1 -3 -4 -5 -2 -3 -3 -5 -4 3 0 -3 0 -2 -3 -2 -1 -3 -3 5 -4 1 -3 -1 -6 +B -2 -2 5 5 -4 -1 1 -2 -1 -5 -5 -1 -4 -4 -3 0 -1 -6 -4 -4 5 -5 0 -1 -6 +J -2 -3 -4 -5 -2 -3 -4 -5 -4 3 4 -3 2 0 -4 -3 -2 -3 -2 1 -5 4 -4 -1 -6 +Z -1 0 -1 1 -5 5 5 -3 0 -4 -4 1 -2 -4 -2 -1 -1 -4 -3 -3 0 -4 5 -1 -6 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -6 +* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/NUC.4.4 b/src/main/resources/scoring-matrices/NUC.4.4 new file mode 100644 index 000000000..c7ba6c68b --- /dev/null +++ b/src/main/resources/scoring-matrices/NUC.4.4 @@ -0,0 +1,24 @@ +# +# This matrix was created by Todd Lowe 12/10/92 +# +# Uses ambiguous nucleotide codes, probabilities rounded to +# nearest integer +# +# Lowest score = -4, Highest score = 5 +# + A T G C S W R Y K M B V H D N +A 5 -4 -4 -4 -4 1 1 -4 -4 1 -4 -1 -1 -1 -2 +T -4 5 -4 -4 -4 1 -4 1 1 -4 -1 -4 -1 -1 -2 +G -4 -4 5 -4 1 -4 1 -4 1 -4 -1 -1 -4 -1 -2 +C -4 -4 -4 5 1 -4 -4 1 -4 1 -1 -1 -1 -4 -2 +S -4 -4 1 1 -1 -4 -2 -2 -2 -2 -1 -1 -3 -3 -1 +W 1 1 -4 -4 -4 -1 -2 -2 -2 -2 -3 -3 -1 -1 -1 +R 1 -4 1 -4 -2 -2 -1 -4 -2 -2 -3 -1 -3 -1 -1 +Y -4 1 -4 1 -2 -2 -4 -1 -2 -2 -1 -3 -1 -3 -1 +K -4 1 1 -4 -2 -2 -2 -2 -1 -4 -1 -3 -3 -1 -1 +M 1 -4 -4 1 -2 -2 -2 -2 -4 -1 -3 -1 -1 -3 -1 +B -4 -1 -1 -1 -1 -3 -3 -1 -1 -3 -1 -2 -2 -2 -1 +V -1 -4 -1 -1 -1 -3 -1 -3 -3 -1 -2 -1 -2 -2 -1 +H -1 -1 -4 -1 -3 -1 -3 -1 -3 -1 -2 -2 -1 -2 -1 +D -1 -1 -1 -4 -3 -1 -1 -3 -1 -3 -2 -2 -2 -1 -1 +N -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/PAM250 b/src/main/resources/scoring-matrices/PAM250 new file mode 100644 index 000000000..9644ee494 --- /dev/null +++ b/src/main/resources/scoring-matrices/PAM250 @@ -0,0 +1,27 @@ +# Entries for the PAM250 matrix at a scale of ln(2)/3.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 -1 0 -1 -8 +R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 -3 0 -1 -8 +N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 -3 1 -1 -8 +D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 -3 3 -1 -8 +C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -5 -1 -8 +Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 -2 3 -1 -8 +E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 -3 3 -1 -8 +G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 -4 0 -1 -8 +H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 -2 2 -1 -8 +I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 3 -2 -1 -8 +L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 5 -3 -1 -8 +K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 -3 0 -1 -8 +M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 3 -2 -1 -8 +F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 2 -5 -1 -8 +P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 -2 0 -1 -8 +S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 -2 0 -1 -8 +T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 -1 -1 -8 +W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -3 -6 -1 -8 +Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -1 -4 -1 -8 +V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 2 -2 -1 -8 +B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 -3 2 -1 -8 +J -1 -3 -3 -3 -5 -2 -3 -4 -2 3 5 -3 3 2 -2 -2 -1 -3 -1 2 -3 5 -2 -1 -8 +Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 -2 3 -1 -8 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -8 +* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/PAM30 b/src/main/resources/scoring-matrices/PAM30 new file mode 100644 index 000000000..55d8a6554 --- /dev/null +++ b/src/main/resources/scoring-matrices/PAM30 @@ -0,0 +1,27 @@ +# Entries for the PAM30 matrix at a scale of ln(2)/2.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 6 -7 -4 -3 -6 -4 -2 -2 -7 -5 -6 -7 -5 -8 -2 0 -1 -13 -8 -2 -3 -6 -3 -1 -17 +R -7 8 -6 -10 -8 -2 -9 -9 -2 -5 -8 0 -4 -9 -4 -3 -6 -2 -10 -8 -7 -7 -4 -1 -17 +N -4 -6 8 2 -11 -3 -2 -3 0 -5 -7 -1 -9 -9 -6 0 -2 -8 -4 -8 6 -6 -3 -1 -17 +D -3 -10 2 8 -14 -2 2 -3 -4 -7 -12 -4 -11 -15 -8 -4 -5 -15 -11 -8 6 -10 1 -1 -17 +C -6 -8 -11 -14 10 -14 -14 -9 -7 -6 -15 -14 -13 -13 -8 -3 -8 -15 -4 -6 -12 -9 -14 -1 -17 +Q -4 -2 -3 -2 -14 8 1 -7 1 -8 -5 -3 -4 -13 -3 -5 -5 -13 -12 -7 -3 -5 6 -1 -17 +E -2 -9 -2 2 -14 1 8 -4 -5 -5 -9 -4 -7 -14 -5 -4 -6 -17 -8 -6 1 -7 6 -1 -17 +G -2 -9 -3 -3 -9 -7 -4 6 -9 -11 -10 -7 -8 -9 -6 -2 -6 -15 -14 -5 -3 -10 -5 -1 -17 +H -7 -2 0 -4 -7 1 -5 -9 9 -9 -6 -6 -10 -6 -4 -6 -7 -7 -3 -6 -1 -7 -1 -1 -17 +I -5 -5 -5 -7 -6 -8 -5 -11 -9 8 -1 -6 -1 -2 -8 -7 -2 -14 -6 2 -6 5 -6 -1 -17 +L -6 -8 -7 -12 -15 -5 -9 -10 -6 -1 7 -8 1 -3 -7 -8 -7 -6 -7 -2 -9 6 -7 -1 -17 +K -7 0 -1 -4 -14 -3 -4 -7 -6 -6 -8 7 -2 -14 -6 -4 -3 -12 -9 -9 -2 -7 -4 -1 -17 +M -5 -4 -9 -11 -13 -4 -7 -8 -10 -1 1 -2 11 -4 -8 -5 -4 -13 -11 -1 -10 0 -5 -1 -17 +F -8 -9 -9 -15 -13 -13 -14 -9 -6 -2 -3 -14 -4 9 -10 -6 -9 -4 2 -8 -10 -2 -13 -1 -17 +P -2 -4 -6 -8 -8 -3 -5 -6 -4 -8 -7 -6 -8 -10 8 -2 -4 -14 -13 -6 -7 -7 -4 -1 -17 +S 0 -3 0 -4 -3 -5 -4 -2 -6 -7 -8 -4 -5 -6 -2 6 0 -5 -7 -6 -1 -8 -5 -1 -17 +T -1 -6 -2 -5 -8 -5 -6 -6 -7 -2 -7 -3 -4 -9 -4 0 7 -13 -6 -3 -3 -5 -6 -1 -17 +W -13 -2 -8 -15 -15 -13 -17 -15 -7 -14 -6 -12 -13 -4 -14 -5 -13 13 -5 -15 -10 -7 -14 -1 -17 +Y -8 -10 -4 -11 -4 -12 -8 -14 -3 -6 -7 -9 -11 2 -13 -7 -6 -5 10 -7 -6 -7 -9 -1 -17 +V -2 -8 -8 -8 -6 -7 -6 -5 -6 2 -2 -9 -1 -8 -6 -6 -3 -15 -7 7 -8 0 -6 -1 -17 +B -3 -7 6 6 -12 -3 1 -3 -1 -6 -9 -2 -10 -10 -7 -1 -3 -10 -6 -8 6 -8 0 -1 -17 +J -6 -7 -6 -10 -9 -5 -7 -10 -7 5 6 -7 0 -2 -7 -8 -5 -7 -7 0 -8 6 -6 -1 -17 +Z -3 -4 -3 1 -14 6 6 -5 -1 -6 -7 -4 -5 -13 -4 -5 -6 -14 -9 -6 0 -6 6 -1 -17 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -17 +* -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 1 \ No newline at end of file diff --git a/src/main/resources/scoring-matrices/PAM70 b/src/main/resources/scoring-matrices/PAM70 new file mode 100644 index 000000000..d287d225a --- /dev/null +++ b/src/main/resources/scoring-matrices/PAM70 @@ -0,0 +1,27 @@ +# Entries for the PAM70 matrix at a scale of ln(2)/2.0. + A R N D C Q E G H I L K M F P S T W Y V B J Z X * +A 5 -4 -2 -1 -4 -2 -1 0 -4 -2 -4 -4 -3 -6 0 1 1 -9 -5 -1 -1 -3 -1 -1 -11 +R -4 8 -3 -6 -5 0 -5 -6 0 -3 -6 2 -2 -7 -2 -1 -4 0 -7 -5 -4 -5 -2 -1 -11 +N -2 -3 6 3 -7 -1 0 -1 1 -3 -5 0 -5 -6 -3 1 0 -6 -3 -5 5 -4 -1 -1 -11 +D -1 -6 3 6 -9 0 3 -1 -1 -5 -8 -2 -7 -10 -4 -1 -2 -10 -7 -5 5 -7 2 -1 -11 +C -4 -5 -7 -9 9 -9 -9 -6 -5 -4 -10 -9 -9 -8 -5 -1 -5 -11 -2 -4 -8 -7 -9 -1 -11 +Q -2 0 -1 0 -9 7 2 -4 2 -5 -3 -1 -2 -9 -1 -3 -3 -8 -8 -4 -1 -3 5 -1 -11 +E -1 -5 0 3 -9 2 6 -2 -2 -4 -6 -2 -4 -9 -3 -2 -3 -11 -6 -4 2 -5 5 -1 -11 +G 0 -6 -1 -1 -6 -4 -2 6 -6 -6 -7 -5 -6 -7 -3 0 -3 -10 -9 -3 -1 -7 -3 -1 -11 +H -4 0 1 -1 -5 2 -2 -6 8 -6 -4 -3 -6 -4 -2 -3 -4 -5 -1 -4 0 -4 1 -1 -11 +I -2 -3 -3 -5 -4 -5 -4 -6 -6 7 1 -4 1 0 -5 -4 -1 -9 -4 3 -4 4 -4 -1 -11 +L -4 -6 -5 -8 -10 -3 -6 -7 -4 1 6 -5 2 -1 -5 -6 -4 -4 -4 0 -6 5 -4 -1 -11 +K -4 2 0 -2 -9 -1 -2 -5 -3 -4 -5 6 0 -9 -4 -2 -1 -7 -7 -6 -1 -5 -2 -1 -11 +M -3 -2 -5 -7 -9 -2 -4 -6 -6 1 2 0 10 -2 -5 -3 -2 -8 -7 0 -6 2 -3 -1 -11 +F -6 -7 -6 -10 -8 -9 -9 -7 -4 0 -1 -9 -2 8 -7 -4 -6 -2 4 -5 -7 -1 -9 -1 -11 +P 0 -2 -3 -4 -5 -1 -3 -3 -2 -5 -5 -4 -5 -7 7 0 -2 -9 -9 -3 -4 -5 -2 -1 -11 +S 1 -1 1 -1 -1 -3 -2 0 -3 -4 -6 -2 -3 -4 0 5 2 -3 -5 -3 0 -5 -2 -1 -11 +T 1 -4 0 -2 -5 -3 -3 -3 -4 -1 -4 -1 -2 -6 -2 2 6 -8 -4 -1 -1 -3 -3 -1 -11 +W -9 0 -6 -10 -11 -8 -11 -10 -5 -9 -4 -7 -8 -2 -9 -3 -8 13 -3 -10 -7 -5 -10 -1 -11 +Y -5 -7 -3 -7 -2 -8 -6 -9 -1 -4 -4 -7 -7 4 -9 -5 -4 -3 9 -5 -4 -4 -7 -1 -11 +V -1 -5 -5 -5 -4 -4 -4 -3 -4 3 0 -6 0 -5 -3 -3 -1 -10 -5 6 -5 1 -4 -1 -11 +B -1 -4 5 5 -8 -1 2 -1 0 -4 -6 -1 -6 -7 -4 0 -1 -7 -4 -5 5 -5 1 -1 -11 +J -3 -5 -4 -7 -7 -3 -5 -7 -4 4 5 -5 2 -1 -5 -5 -3 -5 -4 1 -5 5 -4 -1 -11 +Z -1 -2 -1 2 -9 5 5 -3 1 -4 -4 -2 -3 -9 -2 -2 -3 -10 -7 -4 1 -4 5 -1 -11 +X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -11 +* -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 1 \ No newline at end of file diff --git a/src/test/java/nl/wur/bif/pantools/utils/LoadScoringMatrixTest.java b/src/test/java/nl/wur/bif/pantools/utils/LoadScoringMatrixTest.java new file mode 100644 index 000000000..8aa1ed44c --- /dev/null +++ b/src/test/java/nl/wur/bif/pantools/utils/LoadScoringMatrixTest.java @@ -0,0 +1,39 @@ +package nl.wur.bif.pantools.utils; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Sample tests for the FileUtils.loadScoringMatrix() functions for each of the available matrices. + */ +class LoadScoringMatrixTest { + @Test + void loadBlosum45Test() { + final int[][] blosum = FileUtils.loadScoringMatrix("BLOSUM45"); + assertAll( + () -> assertEquals(blosum['F']['G'], -3), + () -> assertEquals(blosum['W']['W'], 15), + () -> assertEquals(blosum['V']['Q'], -3) + ); + } + @Test + void loadBlosum62Test() { + final int[][] blosum = FileUtils.loadScoringMatrix("BLOSUM62"); + assertAll( + () -> assertEquals(blosum['F']['G'], -3), + () -> assertEquals(blosum['W']['W'], 11), + () -> assertEquals(blosum['V']['Q'], -2) + ); + } + + @Test + void LoadNucTest() { + final int[][] blosum = FileUtils.loadScoringMatrix("NUC.4.4"); + assertAll( + () -> assertEquals(blosum['G']['W'], -4), + () -> assertEquals(blosum['G']['W'], -4), + () -> assertEquals(blosum['G']['W'], -4) + ); + } +} \ No newline at end of file -- GitLab