From 72bbc6ac224d44abd33250d77db6a44602527eff Mon Sep 17 00:00:00 2001 From: Matthijs Moed <matthijs.moed@surf.nl> Date: Tue, 10 May 2022 15:26:14 +0200 Subject: [PATCH] Tiny documentation fix. --- .../bif/pantools/pangenome/GenomeLayer.java | 638 +++++++++--------- 1 file changed, 319 insertions(+), 319 deletions(-) diff --git a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java index f09e62f62..d9c8040e4 100755 --- a/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java +++ b/src/main/java/nl/wur/bif/pantools/pangenome/GenomeLayer.java @@ -114,7 +114,7 @@ import static nl.wur.bif.pantools.pantools.Pantools.total_genomes; /** * Implements all the functionalities related to the sequence layer of the pangenome - * + * * @author Siavash Sheikhizadeh, Eef Jonkheer, Bioinformatics group, Wageningen University, the Netherlands. */ public class GenomeLayer { @@ -129,7 +129,7 @@ public class GenomeLayer { private AtomicLong number_of_hits; private SequenceScanner genomeSc; public long highest_frequency = 0; - + /** * Implements a class for short sequencing reads */ @@ -138,26 +138,26 @@ public class GenomeLayer { StringBuilder forward_seq; StringBuilder reverse_seq; //StringBuilder quality; - + public read() { name = new StringBuilder(); forward_seq = new StringBuilder(); reverse_seq = new StringBuilder(); //quality = new StringBuilder(); } - + public void clear() { name.setLength(0); forward_seq.setLength(0); reverse_seq.setLength(0); //quality.setLength(0); } - + public int length() { return forward_seq.length(); } } - + /** * Implements a class for genomic hit of a single-layout read */ @@ -186,7 +186,7 @@ public class GenomeLayer { cigar = cg; reference = r; } - + public single_hit(single_hit h) { genome = h.genome; sequence = h.sequence; @@ -200,24 +200,24 @@ public class GenomeLayer { cigar = h.cigar; reference = h.reference; } - + public single_hit() { - + } - + @Override public String toString() { - return "(genome:" + genome + - ",sequence:" + sequence + - ",identity:" + identity + - ",score:" + score + - ",start:" + start + - ",offset:" + offset + - ",length:" + length + - ",deletions:" + deletions + - ",forward:" + forward + - ",reference:" + reference + + return "(genome:" + genome + + ",sequence:" + sequence + + ",identity:" + identity + + ",score:" + score + + ",start:" + start + + ",offset:" + offset + + ",length:" + length + + ",deletions:" + deletions + + ",forward:" + forward + + ",reference:" + reference + ",cigar:" + cigar +")"; } } @@ -277,28 +277,28 @@ public class GenomeLayer { @Override public String toString() { - return "(genome1:" + h1.genome + - ",sequence1:" + h1.sequence + - ",identity1:" + h1.identity + - ",score1:" + h1.score + - ",start1:" + h1.start + - ",offset1:" + h1.offset + - ",length1:" + h1.length + - ",deletions1:" + h1.deletions + - ",forward1:" + h1.forward + - ",reference1:" + h1.reference + + return "(genome1:" + h1.genome + + ",sequence1:" + h1.sequence + + ",identity1:" + h1.identity + + ",score1:" + h1.score + + ",start1:" + h1.start + + ",offset1:" + h1.offset + + ",length1:" + h1.length + + ",deletions1:" + h1.deletions + + ",forward1:" + h1.forward + + ",reference1:" + h1.reference + ",cigar1:" + h1.cigar +")" + "\n" + - "(genome2:" + h2.genome + - ",sequence2:" + h2.sequence + - ",identity2:" + h2.identity + - ",score2:" + h2.score + - ",start2:" + h2.start + - ",offset2:" + h2.offset + - ",length2:" + h2.length + - ",deletions2:" + h2.deletions + - ",forward2:" + h2.forward + - ",reference2:" + h2.reference + + "(genome2:" + h2.genome + + ",sequence2:" + h2.sequence + + ",identity2:" + h2.identity + + ",score2:" + h2.score + + ",start2:" + h2.start + + ",offset2:" + h2.offset + + ",length2:" + h2.length + + ",deletions2:" + h2.deletions + + ",forward2:" + h2.forward + + ",reference2:" + h2.reference + ",cigar2:" + h2.cigar +")"; } } @@ -309,22 +309,22 @@ public class GenomeLayer { public static class single_hitComparator implements Comparator<single_hit> { @Override public int compare(single_hit x, single_hit y) { - if (x.score > y.score) + if (x.score > y.score) return -1; - else if (x.score < y.score) + else if (x.score < y.score) return 1; - else if (x.sequence > y.sequence) + else if (x.sequence > y.sequence) return -1; - else if (x.sequence < y.sequence) + else if (x.sequence < y.sequence) return 1; - else if (x.start > y.start) + else if (x.start > y.start) return -1; - else if (x.start < y.start) + else if (x.start < y.start) return 1; - else + else return 0; } - } + } /** * Implements a comparator for paired-end genomic hits @@ -332,18 +332,18 @@ public class GenomeLayer { public static class paired_hitComparator implements Comparator<paired_hit> { @Override public int compare(paired_hit x, paired_hit y) { - if (x.get_score() > y.get_score()) + if (x.get_score() > y.get_score()) return -1; - else if (x.get_score() < y.get_score()) + else if (x.get_score() < y.get_score()) return 1; - else if (x.fragment_length < y.fragment_length) + else if (x.fragment_length < y.fragment_length) return -1; - else if (x.fragment_length > y.fragment_length) + else if (x.fragment_length > y.fragment_length) return 1; - else + else return 0; } - } + } /** * Implements a comparator for integer arrays of size 2 @@ -351,18 +351,18 @@ public class GenomeLayer { public static class IntPairComparator implements Comparator<int[]> { @Override public int compare(int[] x, int[] y) { - if (x[0] > y[0]) + if (x[0] > y[0]) return -1; - else if (x[0] < y[0]) + else if (x[0] < y[0]) return 1; - else if (x[1] < y[1]) + else if (x[1] < y[1]) return -1; - else if (x[1] > y[1]) + else if (x[1] > y[1]) return 1; else return 0; } - } + } /** * Implements a comparator for integers @@ -373,7 +373,7 @@ public class GenomeLayer { return v1 < v2 ? -1 : v1 > v2 ? 1 : 0; } } - + /** * Implements read mapping functionality */ @@ -421,7 +421,7 @@ public class GenomeLayer { //StringBuilder[] forward_read; StringBuilder[] reverse_read; //StringBuilder[] quality; - kmer current_kmer; + kmer current_kmer; //int[] read_len; String[] read_name; int num_hits = 0; @@ -457,7 +457,7 @@ public class GenomeLayer { thread_id = id; genome_numbers = gn; genome_numbers.sort(intcomp); - paired_end = paired; + paired_end = paired; num_segments = paired_end ? 2 : 1; alignment_result = new single_hit(); node_results = new LinkedList(); @@ -584,10 +584,10 @@ public class GenomeLayer { } tx.success(); } - + number_of_reads.getAndAdd(counter); number_of_hits.getAndAdd(num_hits); - number_of_alignments.getAndAdd(num_alns); + number_of_alignments.getAndAdd(num_alns); for (i = 0; i < genome_numbers.size(); ++i) { genome = genome_numbers.get(i); num_shared_mapping[genome].getAndAdd(shared[genome]); @@ -598,11 +598,11 @@ public class GenomeLayer { num_unmapped[adj_total_genomes+1].getAndAdd(unmapped[adj_total_genomes+1]); } } - + /** * Kmerizes the read and collects all the candidate locations read may map in each genome - * - * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) + * + * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) * @param read_string */ public void find_locations(int mate, String read_string) { @@ -658,11 +658,11 @@ public class GenomeLayer { } catch (NotFoundException|ClassCastException ex) { //num_exceptions++; //System.out.println(ex.getMessage()); - } + } //System.out.println(current_kmer.toString()); } } - + public boolean get_read() { synchronized(reader) { int file_nr = 0; @@ -675,23 +675,23 @@ public class GenomeLayer { } else { file_nr = 1; fastq_record[1] = reader[1].next(); - + } } return true; } else { return false; } - } catch (SAMException SAMexc) { // error given by fastqreader - System.out.println("\nError with sample " + (file_nr+1) + ". Read starting on line " + reader[file_nr].getLineNumber() +". Stopping now."); + } catch (SAMException SAMexc) { // error given by fastqreader + System.out.println("\nError with sample " + (file_nr+1) + ". Read starting on line " + reader[file_nr].getLineNumber() +". Stopping now."); return false; } catch (NoSuchElementException nse) { // file 1 is longer than file 2 - System.out.println("\rWARNING! File 1 has more reads as file 2!"); + System.out.println("\rWARNING! File 1 has more reads as file 2!"); return false; } } } - + /** * Removes the /1 or /2 or spaces from the end of the read ID * @param Id @@ -715,11 +715,11 @@ public class GenomeLayer { } return Id.substring(0, idx); - } - + } + /** * Takes the first k-mer of the read - * + * * @param read The read to be kmerized */ public void initialize_kmer(String read) { @@ -732,13 +732,13 @@ public class GenomeLayer { /** * Explores all the incoming edges to a node and collects candidate genomic locations read may map in each genome - * + * * @param node A node of gDBG - * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) + * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) * @param position Offset of the sampled kmer in the read - * @param read_len + * @param read_len */ - + public void explore_node(Node node, int mate, int position, int read_len) { final boolean is_canonical = current_kmer.get_canonical(); @@ -794,7 +794,7 @@ public class GenomeLayer { } /** - * Test whether a node with a given frequency is not highly-frequent. + * Test whether a node with a given frequency is highly-frequent. * @param frequency frequency of the node. * @return true if the node is considered highly-frequent, false if not. */ @@ -893,8 +893,8 @@ public class GenomeLayer { /** * Clusters all the candidate genomic locations based on their proximity and align the read to the candidate locations - * - * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) + * + * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) * @param genome The number of genome read is being mapped against * @param sholder The maximum distance between two neighboring candidate locations in clusters * @param read_string @@ -915,7 +915,7 @@ public class GenomeLayer { sequence = intpair[0]; start = intpair[1]; if (sequence == prev_sequence) { - if (start - prev_start > sholder) { + if (start - prev_start > sholder) { hit_counts.add(new int[]{count, prev_start}); count = 1; prev_start = start; @@ -947,11 +947,11 @@ public class GenomeLayer { locations[mate][genome].clear(); } } - + /** * Aligns the read to the candidate location of each cluster - * - * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) + * + * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) * @param genome The number of genome read is being mapped against * @param sequence The number of sequence read is being mapped against * @param ref_start The candidate location in the genome @@ -979,7 +979,7 @@ public class GenomeLayer { num_hits++; reference.setLength(0); genomeSc.get_sub_sequence(reference, genome, sequence, start, stop - start + 1, true); - if (alignments[mate].size() < 2 * ALIGNMENT_BOUND * read_len && + if (alignments[mate].size() < 2 * ALIGNMENT_BOUND * read_len && find_similar_subject(mate, genome, sequence, start, forward)) { if (valid_hit(read_len)) { hits[mate][genome].offer(new single_hit(alignment_result)); @@ -994,11 +994,11 @@ public class GenomeLayer { } } } - + /** - * Exhaustively aligns the read to a large region around the candidate location. Is used in very sensitive mode. - * - * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) + * Exhaustively aligns the read to a large region around the candidate location. Is used in very sensitive mode. + * + * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) * @param genome The number of genome read is being mapped against * @param sequence The number of sequence read is being mapped against * @param ref_start The candidate location in the genome @@ -1026,18 +1026,18 @@ public class GenomeLayer { /** * Calls the [banded] Smith-Waterman to align query (read) to the subject (genomic hit) - * + * * @param banded_alignment Determines if alignment is banded - * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) + * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) * @param genome The number of genome read is being mapped against * @param sequence The number of sequence read is being mapped against * @param start The candidate location in the genome * @param forward Determines if read should be mapped in forward or in reverse direction - * @param read_string + * @param read_string */ public void perform_alignment(boolean banded_alignment, int mate, int genome, int sequence, int start, boolean forward, String read_string) { if (banded_alignment) { - bounded_aligner.align(forward ? read_string : reverse_read[mate].toString(), reference.toString()); + bounded_aligner.align(forward ? read_string : reverse_read[mate].toString(), reference.toString()); alignment_result.genome = genome; alignment_result.sequence = sequence; alignment_result.cigar = bounded_aligner.get_cigar().toString(); @@ -1050,7 +1050,7 @@ public class GenomeLayer { alignment_result.forward = forward; alignment_result.reference = reference.toString(); } else { - aligner.align(forward ? read_string : reverse_read[mate].toString(), reference.toString()); + aligner.align(forward ? read_string : reverse_read[mate].toString(), reference.toString()); alignment_result.genome = genome; alignment_result.sequence = sequence; alignment_result.cigar = aligner.get_cigar().toString(); @@ -1067,12 +1067,12 @@ public class GenomeLayer { /** * Looks for a similar subject (reference) sequence in the list of previous alignments. - * - * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) + * + * @param mate The number of segment in the read (can be 0 for single, 0/1 for paired-end) * @param genome The number of genome read is being mapped against * @param sequence The number of sequence read is being mapped against * @param ref_start The candidate location in the genome - * @param fwd The direction of the alignment + * @param fwd The direction of the alignment * @return True if there is a similar subject in the list of previous alignments, or False. */ public boolean find_similar_subject(int mate, int genome, int sequence, int ref_start, boolean fwd) { @@ -1097,25 +1097,25 @@ public class GenomeLayer { } } return found; - } - + } + /** * Determines if an alignment is a valid hit. - * + * * @return True if the alignment is a valid hit, or False. */ boolean valid_hit(int read_len) { return (alignment_result.identity > MIN_IDENTITY && - alignment_result.length >= MIN_HIT_LENGTH && - alignment_result.start + alignment_result.offset >= 0 && - alignment_result.start + alignment_result.offset + - alignment_result.deletions + read_len + alignment_result.length >= MIN_HIT_LENGTH && + alignment_result.start + alignment_result.offset >= 0 && + alignment_result.start + alignment_result.offset + + alignment_result.deletions + read_len <= sequence_length[alignment_result.genome][alignment_result.sequence]); } - + /** * Determines if two strings are equal. - * + * * @param s1 The first string * @param s2 The second string * @return True if two strings are equal, or False @@ -1127,7 +1127,7 @@ public class GenomeLayer { are_equal = false; return are_equal; } - + /** * Collects and reports all the hits of the read in the genomes. */ @@ -1159,7 +1159,7 @@ public class GenomeLayer { if (paired_end) alignments[1].clear(); } - + /** * Tries to remap an unmapped read to remaining genomes, exhaustively. */ @@ -1229,7 +1229,7 @@ public class GenomeLayer { } } } - + /** * Calls the suitable method based on mapping mode. */ @@ -1246,9 +1246,9 @@ public class GenomeLayer { break; case 3: // all best hits report_all_hit(true); - } + } } - + /** * Collect all the hits of the current read in a genome * @param genome The genome for which hits are collected @@ -1331,7 +1331,7 @@ public class GenomeLayer { public void report_unique_hit() { if (!paired_end) { // single end reads single_hit h, best_hit; - h = single_hits.remove(); + h = single_hits.remove(); best_hit = new single_hit(h); if (h.start != -1) { if (!single_hits.isEmpty()) { @@ -1347,7 +1347,7 @@ public class GenomeLayer { unique[best_hit.genome]++; } } else { - if (ALIGNMENT_MODE >= 0) { + if (ALIGNMENT_MODE >= 0) { unmapped[best_hit.genome]++; } else { // when competitive mapping unmapped[adj_total_genomes +1]++; @@ -1356,7 +1356,7 @@ public class GenomeLayer { } } else { paired_hit h, best_hit; - h = paired_hits.remove(); + h = paired_hits.remove(); best_hit = new paired_hit(h.fragment_length, h.h1, h.h2); if (best_hit.get_max_start() != -1) { if (!paired_hits.isEmpty()) { @@ -1406,7 +1406,7 @@ public class GenomeLayer { } } } else { // unmapped - if (ALIGNMENT_MODE >= 0) { + if (ALIGNMENT_MODE >= 0) { unmapped[best_hit.h1.genome]++; unmapped[best_hit.h2.genome]++; } else { // when competitive mapping @@ -1418,7 +1418,7 @@ public class GenomeLayer { } /** - * Reports a random best-scored hit. Alignment mode -2 and 2 + * Reports a random best-scored hit. Alignment mode -2 and 2 */ public void report_one_hit() { if (!paired_end) { // single end reads @@ -1446,7 +1446,7 @@ public class GenomeLayer { unique[best_hit.genome]++; } else { shared[best_hit.genome]++; - } + } write_single_sam_record(best_hit, 0, 1.0 / count); } else { if (ALIGNMENT_MODE >= 0) { @@ -1455,7 +1455,7 @@ public class GenomeLayer { unmapped[adj_total_genomes +1]++; } write_single_sam_record(best_hit, 4, 0); - } + } } else { // paired paired_hit h, best_hit; int count; @@ -1470,7 +1470,7 @@ public class GenomeLayer { sum_freq += raw_abundance[h.h1.genome] / genome_sizes[h.h1.genome]; paired_hits_2.add(h); } - + rnd = rand.nextDouble(); while(!paired_hits_2.isEmpty()) { best_hit = paired_hits_2.remove(); @@ -1494,7 +1494,7 @@ public class GenomeLayer { unmapped[best_hit.h2.genome]++; if (count == 1) { unique[best_hit.h1.genome]++; - } else { + } else { shared[best_hit.h1.genome]++; } } else if (best_hit.h2.start != -1) { @@ -1502,25 +1502,25 @@ public class GenomeLayer { unmapped[best_hit.h1.genome]++; if (count == 1) { unique[best_hit.h2.genome]++; - } else { + } else { shared[best_hit.h2.genome]++; } } - } else { + } else { write_paired_sam_record(best_hit, 5, 5, 0, 0); - if (ALIGNMENT_MODE >= 0) { + if (ALIGNMENT_MODE >= 0) { unmapped[best_hit.h1.genome]++; unmapped[best_hit.h2.genome]++; } else { // when competitive mapping - unmapped[adj_total_genomes +1] += 2; + unmapped[adj_total_genomes +1] += 2; } } } } - + /** - * Reports all best-scored hits. Alignment mode -3, 0, 3 - * @param best + * Reports all best-scored hits. Alignment mode -3, 0, 3 + * @param best */ public void report_all_hit(boolean best) { if (!paired_end) { @@ -1532,7 +1532,7 @@ public class GenomeLayer { if (h.start == -1 || (best && h.score < best_hit.score)) { break; } - single_hits_2.add(h); + single_hits_2.add(h); } if (best_hit.start != -1) { prev_genome = -1; @@ -1553,7 +1553,7 @@ public class GenomeLayer { unmapped[adj_total_genomes +1]++; } write_single_sam_record(best_hit, 4, 0); - } + } } else { // paired paired_hit h, best_hit; int count, prev_genome; @@ -1597,7 +1597,7 @@ public class GenomeLayer { } prev_genome = best_hit.h1.genome; } - } else { + } else { write_paired_sam_record(best_hit, 5, 5, 0, 0); if (ALIGNMENT_MODE >= 0) { unmapped[best_hit.h1.genome]++; @@ -1605,10 +1605,10 @@ public class GenomeLayer { } else { // when competitive mapping unmapped[adj_total_genomes +1] += 2; } - } + } } } - + /** * Clears data structures of the collected hits */ @@ -1620,11 +1620,11 @@ public class GenomeLayer { paired_hits.clear(); paired_hits_2.clear(); } - } + } /** * Writes the SAM record for single-end mapping - * + * * @param h The hit to be reported * @param flag The SAM flag of the hit * @param p_value @@ -1656,17 +1656,17 @@ public class GenomeLayer { } } } - + /** * Writes the SAM record of a paired-end hit * @param h The hit to be reported * @param flag1 the initial SAM flag of read 1 * @param flag2 the initial SAM flag of read 2 * @param p_value1 - * @param p_value2 + * @param p_value2 */ public void write_paired_sam_record(paired_hit h, int flag1, int flag2, double p_value1, double p_value2) { - + if (sam_writers == null) { // when --out-format is 0 return; } @@ -1676,7 +1676,7 @@ public class GenomeLayer { sam_record2 = new SAMRecord(null); int position1 = h.h1.start + h.h1.offset + 1; int position2 = h.h2.start + h.h2.offset + 1; - + String chr_name_1 = ""; String chr_name_2 = ""; @@ -1684,15 +1684,15 @@ public class GenomeLayer { chr_name_1 = sequence_titles[h.h1.genome][h.h1.sequence].split("\\s")[0]; } if (h.h2.start!=-1) { - chr_name_2 = sequence_titles[h.h2.genome][h.h2.sequence].split("\\s")[0]; + chr_name_2 = sequence_titles[h.h2.genome][h.h2.sequence].split("\\s")[0]; } - flag1 |= 64; + flag1 |= 64; if (h.h2.start == -1) { flag1 |=8; } else { flag1 |= !h.h2.forward?32:0; // SEQ being reverse complemented } - + if (h.h1.start == -1) { sam_record1.setReadName(read_name[0]); sam_record1.setFlags(flag1); @@ -1713,13 +1713,13 @@ public class GenomeLayer { } } } - + flag1 |= h.h1.forward?0:16; // SEQ being reverse complemented - + //qname - sam_record1.setReadName(read_name[0]); + sam_record1.setReadName(read_name[0]); //flag - sam_record1.setFlags(flag1); + sam_record1.setFlags(flag1); //rname sam_record1.setReferenceName(sequence_titles[h.h1.genome][h.h1.sequence].split("\\s")[0]); //pos @@ -1756,17 +1756,17 @@ public class GenomeLayer { //qual sam_record1.setBaseQualityString(fastq_record[0].getBaseQualityString()); } - - + + // the second segment - flag2 |= 128; - + flag2 |= 128; + if (h.h1.start == -1) { flag2 |=8; } else { flag2 |= !h.h1.forward?32:0; // SEQ being reverse complemented } - + if (h.h2.start == -1) { // not mapped sam_record2.setReadName(read_name[1]); sam_record2.setFlags(flag2); @@ -1777,7 +1777,7 @@ public class GenomeLayer { sam_record2.setMateReferenceName(sequence_titles[h.h1.genome][h.h1.sequence].split("\\s")[0]); sam_record2.setMateAlignmentStart(position1); } - + } else { if (h.h1.start != -1 && h.h2.start != -1 && chr_name_1.equals(chr_name_2)) { if(h.h1.forward != h.h2.forward) { @@ -1786,12 +1786,12 @@ public class GenomeLayer { } } } - + flag2 |= h.h2.forward?0:16; // SEQ being reverse complemented //qname - sam_record2.setReadName(read_name[1]); + sam_record2.setReadName(read_name[1]); //flag - sam_record2.setFlags(flag2); + sam_record2.setFlags(flag2); //rname sam_record2.setReferenceName(sequence_titles[h.h2.genome][h.h2.sequence].split("\\s")[0]); //pos @@ -1804,7 +1804,7 @@ public class GenomeLayer { //rnext if (h.h1.start != -1 && h.h2.start != -1 && ! chr_name_1.equals(chr_name_2)) { sam_record2.setMateReferenceName(sequence_titles[h.h1.genome][h.h1.sequence].split("\\s")[0]); - } else { + } else { sam_record2.setMateReferenceName(sequence_titles[h.h2.genome][h.h2.sequence].split("\\s")[0]); } //pnext @@ -1836,14 +1836,14 @@ public class GenomeLayer { sam_writers[h.h1.genome].addAlignment(sam_record1); sam_writers[h.h2.genome].addAlignment(sam_record2); } - } + } } /** * Calculates length of the fragment of a paired-end hit - * @param h1 Hit of the first segment + * @param h1 Hit of the first segment * @param h2 Hit of the second segment - * @param read_len1 Length of the first read + * @param read_len1 Length of the first read * @param read_len2 Length of the first read * @return The proper length of the fragment, or Infinity if it is invalid. */ @@ -1857,14 +1857,14 @@ public class GenomeLayer { } else { frag_len = position1 + fastq_record[0].getReadLength() - position2; } - + if (frag_len < Math.max(read_len1, read_len2) || frag_len > MAX_FRAGMENT_LENGTH) { frag_len = Integer.MAX_VALUE; } return frag_len; } - } - + } + /** * Maps a single or paired-end library to the pangenome, generating a SAM/BAM * file for each genome. @@ -1888,9 +1888,9 @@ public class GenomeLayer { System.exit(1); } paired = PATH_TO_THE_SECOND_SRA != null; - if (INTERLEAVED) { + if (INTERLEAVED) { paired = true; // both read pairs are stored in the same file - } + } if (PATH_TO_THE_GENOME_NUMBERS_FILE == null && target_genome == null) { System.out.println("No genomes were selected to align to\n" + "Include either a file with --genome-number or provide the genome number with --reference\n"); @@ -1899,7 +1899,7 @@ public class GenomeLayer { System.out.println("Include ONLY genome number using a file with --genome-number OR provide the genome number with --reference\n"); System.exit(1); } - + FastqReader[] reader = new FastqReader[2]; reader[0] = new FastqReader(new File(PATH_TO_THE_FIRST_SRA), true); if (paired && !INTERLEAVED) { @@ -1919,47 +1919,47 @@ public class GenomeLayer { } tx.success(); } - + int extra = 0; if (ALIGNMENT_MODE < 0) { // when competitive mapping extra = 1; } num_shared_mapping = new AtomicLong[genomeDb.num_genomes + 1]; num_unique_mapping = new AtomicLong[genomeDb.num_genomes + 1]; - num_unmapped = new AtomicLong[genomeDb.num_genomes + 1 + extra]; + num_unmapped = new AtomicLong[genomeDb.num_genomes + 1 + extra]; number_of_reads = new AtomicLong(0); number_of_alignments = new AtomicLong(0); - number_of_hits = new AtomicLong(0); + number_of_hits = new AtomicLong(0); ArrayList<Integer>[] genome_numbers = retrieve_genomes_to_map_against(); adj_total_genomes = genome_numbers[0].size(); if (OUTFORMAT.equals("BAM") || OUTFORMAT.equals("SAM")) { sams = new SAMFileWriter[genomeDb.num_genomes + 1 + extra]; headers = new SAMFileHeader[genomeDb.num_genomes + 1 + extra]; - for (i = 0; i < genome_numbers[0].size(); ++i) { + for (i = 0; i < genome_numbers[0].size(); ++i) { genome = genome_numbers[0].get(i); headers[genome] = new SAMFileHeader(); for (j = 1; j <= genomeDb.num_sequences[genome]; ++j) { - headers[genome].addSequence(new SAMSequenceRecord(genomeDb.sequence_titles[genome][j].split("\\s")[0], + headers[genome].addSequence(new SAMSequenceRecord(genomeDb.sequence_titles[genome][j].split("\\s")[0], (int)genomeDb.sequence_length[genome][j])); } headers[genome].addProgramRecord(new SAMProgramRecord("PanTools")); if (OUTFORMAT.equals("BAM")) { - sams[genome] = new SAMFileWriterFactory().makeBAMWriter(headers[genome], false, + sams[genome] = new SAMFileWriterFactory().makeBAMWriter(headers[genome], false, new File(OUTPUT_PATH + "/pantools_" + genome + ".bam")); } else { // SAM - sams[genome] = new SAMFileWriterFactory().makeSAMWriter(headers[genome], false, + sams[genome] = new SAMFileWriterFactory().makeSAMWriter(headers[genome], false, new File(OUTPUT_PATH + "/pantools_" + genome + ".sam")); } } if (ALIGNMENT_MODE < 0) { // when competitive mapping headers[genome_numbers[0].size()+1] = new SAMFileHeader(); - sams[genome_numbers[0].size()+1] = new SAMFileWriterFactory().makeSAMWriter(headers[genome_numbers[0].size()+1], false, + sams[genome_numbers[0].size()+1] = new SAMFileWriterFactory().makeSAMWriter(headers[genome_numbers[0].size()+1], false, new File(OUTPUT_PATH + "/unmapped.sam")); } } else { // --output-format none sams = null; } - + if (!genome_numbers[0].isEmpty()) { final Caches caches = new Caches(); @@ -1979,11 +1979,11 @@ public class GenomeLayer { } if (paired) { - System.out.println("\rProcessed " + number_of_reads.get()*2 + " paired-end reads, " + number_of_reads.get() + " read pairs."); + System.out.println("\rProcessed " + number_of_reads.get()*2 + " paired-end reads, " + number_of_reads.get() + " read pairs."); } else { - System.out.println("\rProcessed " + number_of_reads.get() + " single-end reads"); + System.out.println("\rProcessed " + number_of_reads.get() + " single-end reads"); } - + total_unique = total_mapped = total_unmapped = 0; try { out = new BufferedWriter(new FileWriter(OUTPUT_PATH + "/mapping_summary.txt")); @@ -2004,7 +2004,7 @@ public class GenomeLayer { } } if (ALIGNMENT_MODE < 0) { // competitive mapping - sams[genome_numbers[0].size()+1].close(); + sams[genome_numbers[0].size()+1].close(); System.out.println("\n........................................"); if (paired) { System.out.print("+ Reads where none of the pair mapped: " + num_unmapped[genome_numbers[0].size()+1].get() + "\n"); @@ -2013,7 +2013,7 @@ public class GenomeLayer { } total_unmapped += num_unmapped[genome_numbers[0].size()+1].get(); out.write("Unmapped reads:\t\t\t" + num_unmapped[genome_numbers[0].size()+1].get() + "\n"); - } + } out.close(); } catch (IOException ex) { System.err.println(ex.getMessage()); @@ -2049,7 +2049,7 @@ public class GenomeLayer { /** * Read file provided via --genome-numbers or read genomes directly from command line via --reference - * @return + * @return */ public ArrayList<Integer>[] retrieve_genomes_to_map_against() { ArrayList<Integer>[] genome_numbers; @@ -2087,17 +2087,17 @@ public class GenomeLayer { } else { // --reference was included target_genome = target_genome.replace(" ",""); if (target_genome.endsWith(",")) { - target_genome = target_genome.replaceFirst(".$",""); // remove last character + target_genome = target_genome.replaceFirst(".$",""); // remove last character } - String[] temp_target_array = target_genome.split(","); + String[] temp_target_array = target_genome.split(","); ArrayList<Integer> target_genome_list = new ArrayList<>(); for (String genome_str : temp_target_array) { if (genome_str.contains("-")) { String[] genome_array = genome_str.split("-"); int start = Integer.parseInt(genome_array[0]); int end = Integer.parseInt(genome_array[1]); - for (int i= start; i <= end; i++) { - target_genome_list.add(i); + for (int i= start; i <= end; i++) { + target_genome_list.add(i); } } else { target_genome_list.add(Integer.parseInt(genome_str)); @@ -2121,12 +2121,12 @@ public class GenomeLayer { } return genome_numbers; } - + /** * Multiple quality checks on the data. * - Stops when line contains a space * - Stop if one of the files does not exist. - * - Gives a warning when the first line do not only contain A T C G N + * - Gives a warning when the first line do not only contain A T C G N */ public void verify_if_all_genome_files_exist() { if (PATH_TO_THE_GENOMES_FILE == null) { @@ -2139,7 +2139,7 @@ public class GenomeLayer { StringBuilder new_genomes_file = new StringBuilder(); // KMC requires Linux formatted textfile with newlines try (BufferedReader in = new BufferedReader(new FileReader(PATH_TO_THE_GENOMES_FILE))) { // first check if all files exist for (int c = 0; in.ready();) { - line_counter ++; + line_counter ++; if (line_counter % 100 == 0 || line_counter < 10) { System.out.print("\rVerifying if all input files exist: " + line_counter); } @@ -2170,14 +2170,14 @@ public class GenomeLayer { System.out.println("\rThe genome files for these on these lines were not found: " + not_found.toString().replace(" ","").replace("[","").replace("]","")); System.exit(1); } - + if (not_nucleotide.size() > 0) { System.out.println("\rThe first line of the genome files were checked for A, T, C, G, N." + "\nIn the following genomes, the first line contained other characters: " + not_nucleotide.toString().replace(" ","").replace("[","").replace("]","") ); } write_SB_to_file_full_path(new_genomes_file, PATH_TO_THE_GENOMES_FILE); // replace input file } - + /** * @param fasta_file file location * Reads the first line of a .fasta file to check if it's a nucleotide sequence @@ -2190,22 +2190,22 @@ public class GenomeLayer { String line = in.readLine().trim(); if (line.startsWith(">")) { // do nothing - } else { + } else { int position_counter = 0; - for (int i=0; i < line.length(); i++) { + for (int i=0; i < line.length(); i++) { char character = line.charAt(i); char fUpper = Character.toUpperCase(character); if (fUpper != 'A' && fUpper != 'T' && fUpper != 'C' && fUpper != 'G' && fUpper != 'N') { correct = false; } position_counter ++; - if (position_counter > 500) { + if (position_counter > 500) { // break the loop after the first 500 positions break; } } break; // break the loop after the first line (that is not the header) - } + } } } catch (IOException ioe) { System.out.println("Failed to read: " + fasta_file); @@ -2230,7 +2230,7 @@ public class GenomeLayer { return stream; } - + /** * Only KMC 2.3 and 3.0 (seem to) work. Stop when using another version */ @@ -2244,15 +2244,15 @@ public class GenomeLayer { BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((line = reader.readLine()) != null) { exe_output.append(line).append("\n"); - } + } } catch (Exception e) { e.printStackTrace(); - } + } String log_str = exe_output.toString(); if (log_str.length() < 100) { System.out.println("KMC is not installed " + log_str.length()); System.exit(1); - } + } check_if_program_exists_stdout("kmc_tools", 100, "kmc_tools"); String[] log_array = log_str.split("\n"); if (!log_array[0].contains(" 3.0.") && !log_array[0].contains(" 2.3.")) { @@ -2260,7 +2260,7 @@ public class GenomeLayer { System.exit(1); } } - + /** * Constructs a pangenome (gDBG) for a set of genomes. * build_pangenome() @@ -2287,7 +2287,7 @@ public class GenomeLayer { construct_pangenome(pangenome_node); add_sequence_properties(); localize_nodes(); - + try (Transaction tx = graphDb.beginTx()) { pangenome_node.setProperty("k_mer_size", K_SIZE); pangenome_node.setProperty("num_k_mers", indexSc.length()); @@ -2299,7 +2299,7 @@ public class GenomeLayer { pangenome_node.setProperty("k_mer_highest_frequency", highest_frequency); tx.success(); } - + genome_overview(); System.out.println("\rNumber of kmers: " + indexSc.length()); System.out.println("Number of nodes: " + num_nodes); @@ -2312,7 +2312,7 @@ public class GenomeLayer { System.out.println("genome.db size: " + getFolderSize(new File(WORKING_DIRECTORY + GENOME_DATABASE_PATH)) + " MB\n"); graphDb.shutdown(); } - + /** * Adds new genomes to an available pangenome. */ @@ -2352,7 +2352,7 @@ public class GenomeLayer { add_sequence_properties(); localize_nodes(); genome_overview(); - + System.out.println("\rNumber of kmers: " + indexSc.length()); System.out.println("Number of nodes: " + num_nodes); System.out.println("Number of edges: " + num_edges); @@ -2372,20 +2372,20 @@ public class GenomeLayer { int file_counter = 0; while (exists){ file_counter ++; - exists = check_if_file_exists(WORKING_DIRECTORY + "log/added_genomes_" + file_counter + ".log"); + exists = check_if_file_exists(WORKING_DIRECTORY + "log/added_genomes_" + file_counter + ".log"); } - write_string_to_file_full_path("Increased the size of the database with " + (genomeDb.num_genomes-previous_num_genomes) + " genomes to a total of " + genomeDb.num_genomes, + write_string_to_file_full_path("Increased the size of the database with " + (genomeDb.num_genomes-previous_num_genomes) + " genomes to a total of " + genomeDb.num_genomes, WORKING_DIRECTORY + "log/added_genomes_" + file_counter + ".log"); - + disconnect_pangenome(); System.out.println("graph.db size: " + getFolderSize(new File(WORKING_DIRECTORY + GRAPH_DATABASE_PATH)) + " MB"); System.out.println("index.db size: " + getFolderSize(new File(WORKING_DIRECTORY + INDEX_DATABASE_PATH)) + " MB"); System.out.println("genome.db size: " + getFolderSize(new File(WORKING_DIRECTORY + GENOME_DATABASE_PATH)) + " MB\n"); } - + /** * Retrieves a selection of genomic regions from the genome database. - * + * */ public void retrieve_regions() { int genome_nr, sequence_nr, begin, end, num_regions = 0, proper_regions = 0; @@ -2394,11 +2394,11 @@ public class GenomeLayer { System.out.println("No --regions-file or -rf was provided.\n"); System.exit(1); } - + try { BufferedReader in = new BufferedReader(new FileReader(PATH_TO_THE_REGIONS_FILE)); while (in.ready()) { - String line = in.readLine().trim(); + String line = in.readLine().trim(); if (line.equals("")) { continue; } @@ -2409,7 +2409,7 @@ public class GenomeLayer { System.out.println("Failed to read: " + PATH_TO_THE_REGIONS_FILE + "\n"); System.exit(1); } - + connect_pangenome(); indexSc = new IndexScanner(indexDb); K_SIZE = indexSc.get_K(); @@ -2424,7 +2424,7 @@ public class GenomeLayer { try (BufferedReader in = new BufferedReader(new FileReader(PATH_TO_THE_REGIONS_FILE))) { BufferedWriter out = new BufferedWriter(new FileWriter(combi_output_file)); // file to combine the sequences while (in.ready()) { - String line = in.readLine().trim(); + String line = in.readLine().trim(); StringBuilder seq = new StringBuilder(); line = line.trim(); if (line.equals("")) { @@ -2433,7 +2433,7 @@ public class GenomeLayer { line_counter ++; String region_file = WORKING_DIRECTORY + "retrieval/regions/region_" + line_counter + ".fasta"; String[] fields = line.trim().split("\\s"); - + log_builder.append(line_counter).append(";").append(line).append(";"); switch (fields.length) { case 1: // a genome number is provided OR the row is not split correctly @@ -2448,7 +2448,7 @@ public class GenomeLayer { write_genome_file(genome_nr); } catch(NumberFormatException e) { log_builder.append("no, cannot be converted a number\n"); - System.out.println(line + " -> Unable to be converted to a genome number"); + System.out.println(line + " -> Unable to be converted to a genome number"); } break; case 2: // a genome number + sequence number is provided OR the row is not split correctly @@ -2468,17 +2468,17 @@ public class GenomeLayer { genomeSc.get_sub_sequence(seq, genome_nr, sequence_nr, begin, end - begin + 1, true); out.write(">genome" + genome_nr + "_sequence" + sequence_nr + "\n"); write_fasta(out, seq.toString(), 80); - + BufferedWriter region_writer = new BufferedWriter(new FileWriter(region_file)); region_writer.write(">genome" + genome_nr + "_sequence" + sequence_nr + "\n"); write_fasta(region_writer, seq.toString(), 80); region_writer.close(); proper_regions ++; } catch(NumberFormatException e) { - System.out.println(line + " -> Unable to be converted to a genome & sequence number"); + System.out.println(line + " -> Unable to be converted to a genome & sequence number"); } break; - case 4: case 5: + case 4: case 5: // 4: a genome number + sequence number + region start + region end position are provided // 5: all numbers AND the strand orientation is provided log_builder.append("genome,sequence, start coordinate, stop coordinate"); @@ -2495,37 +2495,37 @@ public class GenomeLayer { System.out.println(line + " -> Unable to correctly retrieve four numbers (Integers)"); continue; } - + if (!rr_check_if_appropriate_genome(genome_nr, log_builder, line)) { continue; } if (!rr_check_if_appropriate_sequence(genome_nr, sequence_nr, log_builder, line)) { continue; } - + long last_sequence_position = genomeDb.sequence_length[genome_nr][sequence_nr]; - if (begin < 1 || begin > last_sequence_position) { - System.out.println(line + " -> The start coordinate should be between 1 and " + + if (begin < 1 || begin > last_sequence_position) { + System.out.println(line + " -> The start coordinate should be between 1 and " + last_sequence_position + " for genome " + genome_nr); log_builder.append("no, the start coordinate should be between 1 and ").append(last_sequence_position) .append(" for genome ").append(genome_nr).append("\n"); continue; } - - if (end <= begin || end > genomeDb.sequence_length[genome_nr][sequence_nr]) { - System.out.println(line + " -> The end coordinate should be between " + (begin+1) + " and " + last_sequence_position + + if (end <= begin || end > genomeDb.sequence_length[genome_nr][sequence_nr]) { + System.out.println(line + " -> The end coordinate should be between " + (begin+1) + " and " + last_sequence_position + " for genome " + genome_nr); log_builder.append("no, the end coordinate should be between ").append((begin+1)).append(" and ") .append(last_sequence_position).append(" for genome ").append(genome_nr).append("\n"); continue; } - + log_builder.append("yes\n"); proper_regions ++; out.write(">genome" + genome_nr + "_sequence" + sequence_nr + "_" + begin + "_" + end + "\n"); begin -= 1; end -= 1; - + if (line.endsWith("-") || line.endsWith("rv")) { // manual only mentions '-' genomeSc.get_sub_sequence(seq, genome_nr, sequence_nr, begin, end - begin + 1, false); // reverse complement } else { @@ -2535,10 +2535,10 @@ public class GenomeLayer { BufferedWriter region_writer = new BufferedWriter(new FileWriter(region_file)); region_writer.write(">genome" + genome_nr + "_sequence" + sequence_nr + "_" + (begin+1) + "_" + (end+1) + "\n"); write_fasta(region_writer, seq.toString(), 80); - region_writer.close(); + region_writer.close(); break; - - default: + + default: log_builder.append("region not recognized\n"); break; } @@ -2550,13 +2550,13 @@ public class GenomeLayer { System.out.println(""); } System.out.println(proper_regions + " out of " + num_regions + " genomic regions found and retrieved successfully!\n\n" - + "Log written to:\n" + + "Log written to:\n" + " " + WORKING_DIRECTORY + "retrieval/regions/retrieve_regions.log\n" - + "\n" - + "Output written to:\n" + + "\n" + + "Output written to:\n" + " " + WORKING_DIRECTORY + "retrieval/\n" + " " + combi_output_file + "\n"); - + } catch (IOException ioe) { System.out.println("Failed to write: " + combi_output_file + "\n"); System.exit(1); @@ -2565,13 +2565,13 @@ public class GenomeLayer { } disconnect_pangenome(); } - + /** * Part of the retrieve_regions function, check if the genome number is correct * @param genome_nr * @param log_builder * @param line - * @return + * @return */ public boolean rr_check_if_appropriate_genome(int genome_nr, StringBuilder log_builder, String line) { if (genome_nr > total_genomes || genome_nr < 1) { @@ -2581,14 +2581,14 @@ public class GenomeLayer { } return true; } - + /** * Part of the retrieve_regions function, check if the sequence number is correct * @param genome_nr * @param sequence_nr * @param log_builder * @param line - * @return + * @return */ public boolean rr_check_if_appropriate_sequence(int genome_nr, int sequence_nr, StringBuilder log_builder, String line) { int allowed_sequence_nr = genomeDb.num_sequences[genome_nr]; @@ -2599,10 +2599,10 @@ public class GenomeLayer { } return true; } - + /** - * - * @param genome + * + * @param genome */ public void write_genome_file(int genome) { StringBuilder seq = new StringBuilder(); @@ -2620,15 +2620,15 @@ public class GenomeLayer { } catch (IOException e) { System.out.println("Unable to write a genome to " + WORKING_DIRECTORY + "/retrieval/genome_" + genome + ".fasta\n"); System.exit(1); - } + } } - + /** * Retrieves some complete genomes from the genome database. */ public void retrieve_genomes() { - System.out.println("\nAs of PanTools version 3.3, this functionality is integrated into 'retrieve_regions'.\n" + - "\nCheck the manual on how to run:\n\n" + + System.out.println("\nAs of PanTools version 3.3, this functionality is integrated into 'retrieve_regions'.\n" + + "\nCheck the manual on how to run:\n\n" + " pantools retrieve_regions --help\n"); System.exit(0); /* @@ -2702,8 +2702,8 @@ public class GenomeLayer { /** * Gives the outgoing relationship to take based on the query coordinate. - * - * @param current_node The current node to go out from. + * + * @param current_node The current node to go out from. * @param origin The string determining query genome and sequence, e.g. "G2S3" * @param pos The query position. * @return The proper outgoing edge. @@ -2719,7 +2719,7 @@ public class GenomeLayer { } return null; } - + /** * Appends substring s[from..to] to a given StringBuilder. * @param seq The given StringBuilder. @@ -2733,7 +2733,7 @@ public class GenomeLayer { seq.append(s.charAt(i)); return to - from + 1; } - + /** * Appends the reverse complement of substring s[from..to] to a given StringBuilder. * @param seq The given StringBuilder. @@ -2747,16 +2747,16 @@ public class GenomeLayer { seq.append(complement(s.charAt(i))); return to - from + 1; } - + /** - * + * * @param address An integer array lile {genome_number, sequence_number, begin_position, end_position} * @return A pointer to the genomic position in the pangenome */ - + /** * Finds a graph pointer pointing to the specified genomic coordinate in the pangenome. - * + * * @param graphDb The pangenome graph database * @param genomeSc The genome database * @param indexSc The index database @@ -2778,14 +2778,14 @@ public class GenomeLayer { pre_len = indexSc.get_pre_len(); kmer first_kmer = new kmer(k_size, pre_len); degenerate = false; - + if (position <= genomeSc.get_sequence_length(genome, sequence) - k_size) { for (i = 0; i < k_size && !degenerate; ++i) { code = genomeSc.get_code(genome, sequence, position + i); if (code < 4) first_kmer.next_fwd_kmer(code); else - degenerate = true; + degenerate = true; } if (!degenerate) { pointer = new IndexPointer(); @@ -2802,7 +2802,7 @@ public class GenomeLayer { anchor_nodes = (long[]) seq_node.getProperty("anchor_nodes"); anchor_positions = (int[]) seq_node.getProperty("anchor_positions"); anchor_sides = (String) seq_node.getProperty("anchor_sides"); - // Find the immediate preceding anchor_node, searching in the sorted array of anchor positions. + // Find the immediate preceding anchor_node, searching in the sorted array of anchor positions. for (low = 0, high = anchor_sides.length() - 1, mid = (low + high) / 2; low <= high; mid = (low + high) / 2) { if (genomic_pos < anchor_positions[mid]) { high = mid - 1; @@ -2839,10 +2839,10 @@ public class GenomeLayer { } return new IndexPointer(node.getId(), forward, forward ? genomic_pos - node_start_pos : node_len - 1 - (genomic_pos - node_start_pos), -1l); } - + /*** * Creates an edge from source node to destination node. - * + * * @param src The source node * @param des The destination node * @param edge_type One of the four possible edge types: FF, FR, RF, RR @@ -2851,11 +2851,11 @@ public class GenomeLayer { if (DEBUG) System.out.println("connect "+src.getId()+" "+edge_type.name()+" "+des.getId()); src.createRelationshipTo(des, edge_type); } - + /** - * Splits a node at a specified position by creating a new node called + * Splits a node at a specified position by creating a new node called * split_node as a part separated from the node. - * + * * @param node The node which should be split. * @param pos The split position with respect to the start of the node. * @return The newly created split node. @@ -2880,7 +2880,7 @@ public class GenomeLayer { split_node.setProperty("address", address); split_len = node_len - pos; split_node.setProperty("length", split_len); - + // Updates the 'start' edges comming from the gene node to the nucleotide node. Iterable<Relationship> relations = node.getRelationships(RelTypes.starts, Direction.INCOMING); for (Relationship r : relations) { @@ -2895,13 +2895,13 @@ public class GenomeLayer { rel.setProperty("forward", r.getProperty("forward")); rel.setProperty("genomic_position", r.getProperty("genomic_position")); r.delete(); - } - } - + } + } + // Updating the Kmers chain in the index node_last_kmer = indexSc.find(genomeSc.make_kmer(gen,seq,loc+pos-1)); split_first_kmer = indexSc.get_next_index(node_last_kmer); - indexSc.put_next_index(-1L, node_last_kmer); + indexSc.put_next_index(-1L, node_last_kmer); split_node.setProperty("first_kmer",split_first_kmer); split_node.setProperty("last_kmer",node.getProperty("last_kmer")); s_id=(int)split_node.getId(); @@ -2909,18 +2909,18 @@ public class GenomeLayer { { indexSc.put_node_id(s_id, inx); indexSc.put_offset(i, inx); - } + } // Moving forward-outgoing and reverse-incoming edges from node to split node. for (Relationship r : node.getRelationships(Direction.OUTGOING, RelTypes.FR, RelTypes.FF)) { neighbor = r.getEndNode(); - if (neighbor.equals(node)) + if (neighbor.equals(node)) neighbor = r.isType(RelTypes.FF) ? node : split_node; connect(split_node, neighbor, r.getType()); r.delete(); } for (Relationship r : node.getRelationships(Direction.INCOMING,RelTypes.RR,RelTypes.FR)) { neighbor = r.getStartNode(); - if (neighbor.equals(node)) + if (neighbor.equals(node)) neighbor = r.isType(RelTypes.RR) ? node : split_node; connect(neighbor, split_node, r.getType()); r.delete(); @@ -2938,7 +2938,7 @@ public class GenomeLayer { node.setProperty("length", pos + K_SIZE - 1); return split_node; } - + /** * Creates and extends a new node till reaching to a visited K-mer or a degenerate region. */ @@ -2955,7 +2955,7 @@ public class GenomeLayer { new_node.setProperty("length", K_SIZE); new_node.setProperty("last_kmer",genomeSc.get_curr_index()); new_node.setProperty("first_kmer",genomeSc.get_curr_index()); - // Set the pointer to the Kmer in the pointer database + // Set the pointer to the Kmer in the pointer database indexSc.put_pointer(node_id, 0, genomeSc.get_curr_kmer().get_canonical(), -1l, genomeSc.get_curr_index()); connect(curr_node ,new_node, RelTypes.values()[curr_side*2]); ++num_edges; @@ -2975,16 +2975,16 @@ public class GenomeLayer { genomeSc.next_position(); begin = genomeSc.get_position() - K_SIZE + 1; curr_node.setProperty("length", len); - curr_node.setProperty("last_kmer",last_kmer); + curr_node.setProperty("last_kmer",last_kmer); genomeSc.jump_forward(); if (genomeSc.get_position() >= genomeSc.get_sequence_length() - 1) { genomeSc.next_position();// to acheive the right length for the degenerate node finish = true; - } + } int[] add = genomeSc.get_address(); add[2] = begin; degenerate_node = create_degenerate(add); - connect(curr_node ,degenerate_node, RelTypes.FF); + connect(curr_node ,degenerate_node, RelTypes.FF); ++num_edges; curr_node = degenerate_node; break; @@ -3014,10 +3014,10 @@ public class GenomeLayer { } /** - * Finds, splits and follows (in forward direction) a node pointed by a + * Finds, splits and follows (in forward direction) a node pointed by a * pointer till the first k-mer different to what is observed in the scanned sequence. - * - * @param pointer The pointer + * + * @param pointer The pointer */ private void follow_forward(IndexPointer pointer) { int l, pos, begin, g, s, loc, side; @@ -3029,19 +3029,19 @@ public class GenomeLayer { node = graphDb.getNodeById(pointer.node_id); if (DEBUG) System.out.println("follow_forward "+pointer.node_id+" at "+pos); // The first split might be done to seperate the part we need to enter in. - if (pos > 0) { + if (pos > 0) { if (DEBUG) System.out.println("first_split "+node.getId()+" at "+pos); split_node1 = split(node, pos); - if (loop = (curr_node.equals(node) && curr_side == 0)) + if (loop = (curr_node.equals(node) && curr_side == 0)) src = split_node1; - else + else src = curr_node; node = split_node1; // Note : assigning reference variables is dangerous! if split_node changes node will change as well. } else { split_node1 = node; - if (loop = (curr_node.equals(node) && curr_side == 0)) + if (loop = (curr_node.equals(node) && curr_side == 0)) src = split_node1; - else + else src = curr_node; } des = split_node1; @@ -3055,14 +3055,14 @@ public class GenomeLayer { // Follow the shared part for (pos = 0; pos <= l && genomeSc.get_position() <= genomeSc.get_sequence_length() - 1 && genomeSc.get_code(g, s, loc + pos + K_SIZE - 1) == genomeSc.get_code(0); ++pos) { genomeSc.next_position(); - // If hit a degenarate region aplit and branch to a degenerate node + // If hit a degenarate region aplit and branch to a degenerate node if (genomeSc.get_position() <= genomeSc.get_sequence_length() - 1 && genomeSc.get_code(0) > 3) { begin = genomeSc.get_position() - K_SIZE + 1; genomeSc.jump_forward(); if (genomeSc.get_position() >= genomeSc.get_sequence_length() - 1) { genomeSc.next_position();// to acheive the right length for the degenerate node finish = true; - } + } if (pos + 1 <= l) { split_node2 = split(node, pos + 1); if (loop) @@ -3080,14 +3080,14 @@ public class GenomeLayer { finish = true; } else if (degenerate_node == null) // build the Kmer of difference initialize(genomeSc.get_position() - K_SIZE + 1); - // A second split might be needed + // A second split might be needed if (degenerate_node == null && pos <= l) { if (DEBUG) System.out.println("second_split "+node.getId()+" at "+pos); split_node2 = split(node, pos); if (loop) src = split_node2; } - // connect the current node before doing splits to the split_node1 + // connect the current node before doing splits to the split_node1 rel_type = RelTypes.values()[side]; repeated_edge = false; for (Relationship r: src.getRelationships(rel_type, Direction.OUTGOING)) @@ -3105,12 +3105,12 @@ public class GenomeLayer { } else curr_node = node; } - + /** - * Finds, splits and follows (in reverse direction) a node pointed by a + * Finds, splits and follows (in reverse direction) a node pointed by a * pointer till the first k-mer different to what is observed in the scanned sequence. - * - * @param pointer The pointer + * + * @param pointer The pointer */ private void follow_reverse(IndexPointer pointer) { int pos, begin, g, s, loc, side; @@ -3128,13 +3128,13 @@ public class GenomeLayer { split_node1 = split(node, pos+1); if (loop = curr_node.equals(node) && curr_side == 0) // might be in reverse side due to a follow reverse src = split_node1; - else + else src = curr_node; } else { split_node1 = node; if (loop = curr_node.equals(node) && curr_side == 0) src = split_node1; - else + else src = curr_node; } des = node; @@ -3152,7 +3152,7 @@ public class GenomeLayer { if (genomeSc.get_position() >= genomeSc.get_sequence_length() - 1) { genomeSc.next_position();// to acheive the right length for the degenerate node finish = true; - } + } if (pos > 0) { split_node2 = split(node, pos); des = split_node2; @@ -3198,7 +3198,7 @@ public class GenomeLayer { /** * Creates a degenerate node with a given address {genome, sequence, position}. - * + * * @param address The genomic position of the degenerate region */ private Node create_degenerate(int[] address) { @@ -3211,11 +3211,11 @@ public class GenomeLayer { degenerate_node.setProperty("length", genomeSc.get_position() - address[2]); return degenerate_node; } - + /** * Initializes the first kmer of the current sequence which can be possibly - * located immediately after a degenerate region. - * + * located immediately after a degenerate region. + * * @param start The start position. */ private void initialize(int start) { @@ -3224,20 +3224,20 @@ public class GenomeLayer { if (genomeSc.get_position() >= genomeSc.get_sequence_length() - 1) { genomeSc.next_position();// to acheive the right length for the degenerate node finish = true; - } + } int[] add = genomeSc.get_address(); add[2] = 0; degenerate_node = create_degenerate(add); connect(curr_node ,degenerate_node, RelTypes.values()[curr_side*2]); ++num_edges; - curr_node = degenerate_node; + curr_node = degenerate_node; } } - + /** * Constructs the pangenome starting from the pangenome node. * . - * @param pangenome_node The pangenome node + * @param pangenome_node The pangenome node */ void construct_pangenome(Node pangenome_node) { int trsc = 0; @@ -3264,7 +3264,7 @@ public class GenomeLayer { sequence_node.setProperty("offset", genomeSc.get_offset()); genome_node.createRelationshipTo(sequence_node, RelTypes.has); finish = false; - System.out.print("\rProcessing sequence " + genomeSc.get_sequence() + "/" + genomeDb.num_sequences[genomeSc.get_genome()] + + System.out.print("\rProcessing sequence " + genomeSc.get_sequence() + "/" + genomeDb.num_sequences[genomeSc.get_genome()] + " of genome " + genomeSc.get_genome() + "\tlength=" + genomeSc.get_sequence_length() + " "); curr_node = sequence_node; curr_side = 0; @@ -3283,10 +3283,10 @@ public class GenomeLayer { create_extend(); else if (genomeSc.get_curr_kmer().get_canonical() ^ pointer.canonical)// if sides don't agree follow_reverse(pointer); - else + else follow_forward(pointer); ++trsc; - if (trsc >= MAX_TRANSACTION_SIZE) { + if (trsc >= MAX_TRANSACTION_SIZE) { tx.success(); tx.close(); tx = graphDb.beginTx(); @@ -3296,21 +3296,21 @@ public class GenomeLayer { connect(curr_node, sequence_node, RelTypes.values()[curr_side*2]);// to point to the last k-mer of the sequence located in the other strand ++num_edges; } - System.out.println("\rProcessing sequence " + genomeSc.get_sequence() + "/" + genomeDb.num_sequences[genomeSc.get_genome()] + + System.out.println("\rProcessing sequence " + genomeSc.get_sequence() + "/" + genomeDb.num_sequences[genomeSc.get_genome()] + " of genome " + genomeSc.get_genome() + "\tlength=" + genomeSc.get_sequence_length() + " " ); genomeSc.next_sequence(); }//sequences System.out.println("\r" + (System.currentTimeMillis() - phaseTime) / 1000 + " seconds elapsed. "); - genomeSc.next_genome(); + genomeSc.next_genome(); }//genomes tx.success(); } finally { tx.close(); } } - + /** - * Traces each sequence in the pangenome and adds coordinate information + * Traces each sequence in the pangenome and adds coordinate information * to the edges and list of anchors information to sequence nodes. */ void localize_nodes() { @@ -3334,7 +3334,7 @@ public class GenomeLayer { int[] new_positions; int[] address = new int[3], addr = null; boolean is_node = false, is_degenerate = false, found = true; - try (Transaction tx = graphDb.beginTx()) { + try (Transaction tx = graphDb.beginTx()) { sequence_iterator = graphDb.findNodes(sequence_label); sequence_nodes = new LinkedList(); while (sequence_iterator.hasNext()) @@ -3369,7 +3369,7 @@ public class GenomeLayer { addr = (int[]) neighbor.getProperty("address"); neighbor_length = (int) neighbor.getProperty("length"); } - + if ((is_node && genomeSc.compare(address, addr, K_SIZE - 1, neighbor_side == 'F' ? K_SIZE - 1 : neighbor_length - K_SIZE, 1, neighbor_side == 'F')) || (is_degenerate && Arrays.equals(addr, address))) { @@ -3449,9 +3449,9 @@ public class GenomeLayer { } System.out.println(); } - + /** - * Extracts the sequence of the nodes from the genome database and store it + * Extracts the sequence of the nodes from the genome database and store it * in the nodes. */ void add_sequence_properties() { @@ -3472,7 +3472,7 @@ public class GenomeLayer { } Transaction tx = graphDb.beginTx(); try { - //num_bases = K - 1; // for the missed overlapped of the last node of each sequence which will not be stored + //num_bases = K - 1; // for the missed overlapped of the last node of each sequence which will not be stored while (!nodes.isEmpty()) { node = nodes.remove(); addr = (int[]) node.getProperty("address"); @@ -3499,9 +3499,9 @@ public class GenomeLayer { /** * Removes the given property from all the nucleotide and degenerate nodes. - * - * @param property - */ + * + * @param property + */ void drop_nodes_property(String property) { int i; ResourceIterator<Node> nodes_iterator; @@ -3528,7 +3528,7 @@ public class GenomeLayer { /** * Removes the coordinate information from the edges. - */ + */ void drop_edges_colors() { int i; ResourceIterator<Relationship> rels; @@ -3553,7 +3553,7 @@ public class GenomeLayer { /** * Calculates size of a given folder in MB. - * + * * @param dir The folder File object. * @return Size of the folder in MB */ -- GitLab