Skip to content
Snippets Groups Projects
Commit 42bb0c7b authored by Moed, Matthijs's avatar Moed, Matthijs
Browse files

More refactoring on explore_node().

parent a960b920
No related branches found
No related tags found
No related merge requests found
...@@ -741,84 +741,66 @@ public class GenomeLayer { ...@@ -741,84 +741,66 @@ public class GenomeLayer {
public void explore_node(Node node, int mate, int position, int read_len) { public void explore_node(Node node, int mate, int position, int read_len) {
final boolean is_canonical = current_kmer.get_canonical(); final boolean is_canonical = current_kmer.get_canonical();
//noinspection ConstantConditions if (isHighlyFrequent(getNodeFrequency(node)))
final long frequency = caches return;
.getNodeFrequencyCache()
.get(node, n -> (long) n.getProperty("frequency"));
if (isHighlyFrequent(frequency)) {
// for each incoming edge to the node of the anchor
final List<Relationship> incomingRelationships = caches
.getIncomingRelationshipsCache()
.get(node, n -> {
final List<Relationship> relationships = new ArrayList<>();
node
.getRelationships(Direction.INCOMING, RelTypes.FF, RelTypes.FR, RelTypes.RF, RelTypes.RR)
.forEach(relationships::add);
return relationships;
});
// TODO: fix dereference warning for (Relationship r: getIncomingRelationships(node)) {
for (Relationship r: incomingRelationships) { final char side = r.getType().name().charAt(1);
final char side = r.getType().name().charAt(1); // for all sequences passing that node
// for all sequences passing that node for (String seq_id: r.getPropertyKeys()) {
for (String seq_id: r.getPropertyKeys()) { final Address address = getAddress(seq_id);
final Address address = getAddress(seq_id); final int genome = address.getGenomeIndex();
final int genome = address.getGenomeIndex();
if (locations[mate][genome] != null) {// should map against this genome if (locations[mate][genome] != null) {// should map against this genome
final int sequence = address.getSequenceIndex(); final int sequence = address.getSequenceIndex();
// calculate the locations based on the offsets in the node // calculate the locations based on the offsets in the node
final int[] location_array = (int[])r.getProperty(seq_id); final int[] location_array = (int[])r.getProperty(seq_id);
final long seq_len = sequence_length[genome][sequence]; final long seq_len = sequence_length[genome][sequence];
if (side == 'F') { if (side == 'F') {
for (int j : location_array) { for (int j : location_array) {
if (pointer.canonical ^ is_canonical) { if (pointer.canonical ^ is_canonical) {
final int loc = j + pointer.offset - read_len + position + K; final int loc = j + pointer.offset - read_len + position + K;
if (loc >= 0 && loc <= seq_len - read_len) { if (loc >= 0 && loc <= seq_len - read_len) {
node_results.add(new int[]{genome, sequence, -(1 + loc), 1}); node_results.add(new int[]{genome, sequence, -(1 + loc), 1});
} }
} else { } else {
final int loc = j + pointer.offset - position; final int loc = j + pointer.offset - position;
if (loc >= 0 && loc <= seq_len - read_len) { if (loc >= 0 && loc <= seq_len - read_len) {
node_results.add(new int[]{genome, sequence, loc, 1}); node_results.add(new int[]{genome, sequence, loc, 1});
}
} }
} }
}else{ }
//noinspection ConstantConditions }else{
final int node_len = caches //noinspection ConstantConditions
.getNodeLengthCache() final int node_len = getNodeLength(node);
.get(node, n -> (int) n.getProperty("length"));
for (int j : location_array) { for (int j : location_array) {
if (pointer.canonical ^ is_canonical) { if (pointer.canonical ^ is_canonical) {
final int loc = j + node_len - K - pointer.offset - position; final int loc = j + node_len - K - pointer.offset - position;
if (loc >= 0 && loc <= seq_len - read_len) { if (loc >= 0 && loc <= seq_len - read_len) {
node_results.add(new int[]{genome, sequence, loc, -1}); node_results.add(new int[]{genome, sequence, loc, -1});
} }
} else { } else {
final int loc = j + node_len - pointer.offset - read_len + position; final int loc = j + node_len - pointer.offset - read_len + position;
if (loc >= 0 && loc <= seq_len - read_len) { if (loc >= 0 && loc <= seq_len - read_len) {
node_results.add(new int[]{genome, sequence, -(1 + loc), -1}); node_results.add(new int[]{genome, sequence, -(1 + loc), -1});
}
} }
} }
} }
} }
} }
} }
} }
} }
/** /**
* Test whether a node with a given frequency is highly-frequent. * Test whether a node with a given frequency is not highly-frequent.
* @param frequency frequency of the node. * @param frequency frequency of the node.
* @return true if the node is considered highly-frequent, false if not. * @return true if the node is considered highly-frequent, false if not.
*/ */
private boolean isHighlyFrequent(long frequency) { private boolean isHighlyFrequent(long frequency) {
// TODO: cast to int should probably be a cast to long // TODO: cast to int should probably be a cast to long
return frequency <= (int)(total_genomes_size / 10000000.0 + num_genomes * 5 * Math.log(total_genomes_size)); return frequency > (int)(total_genomes_size / 10000000.0 + num_genomes * 5 * Math.log(total_genomes_size));
} }
/** /**
...@@ -830,6 +812,51 @@ public class GenomeLayer { ...@@ -830,6 +812,51 @@ public class GenomeLayer {
return Address.fromRelationshipPropertyName(propertyName); return Address.fromRelationshipPropertyName(propertyName);
} }
/**
* Return frequency of a nucleotide node. Will attempt to retrieve the frequency from cache first and, if
* missing, retrieve it from Neo4j instead and storing it in the cache for later use.
* @param node node to get frequency of.
* @return node frequency.
*/
public long getNodeFrequency(Node node) {
//noinspection ConstantConditions
return caches
.getNodeFrequencyCache()
.get(node, n -> (long) n.getProperty("frequency"));
}
/**
* Return length of a nucleotide node (i.e. its sequence length). Will attempt to retrieve the length from cache
* first and, if missing, retrieve it from Neo4j instead and storing it in the cache for later use.
* @param node node to get length of.
* @return node length.
*/
public int getNodeLength(Node node) {
//noinspection ConstantConditions
return caches
.getNodeLengthCache()
.get(node, n -> (int) n.getProperty("length"));
}
/**
* Return all incoming relationships of type FF, FR, RF and RR of a nucleotide node. Will attempt to retrieve
* relationships from a cache first and, if missing, retrieve them from Neo4j instead and storing them in the
* cache for later use.
* @param node node to get incoming relationships for.
* @return all incoming relationships of type FF, FR, RF and RR.
*/
public List<Relationship> getIncomingRelationships(Node node) {
return caches
.getIncomingRelationshipsCache()
.get(node, n -> {
final List<Relationship> relationships = new ArrayList<>();
node
.getRelationships(Direction.INCOMING, RelTypes.FF, RelTypes.FR, RelTypes.RF, RelTypes.RR)
.forEach(relationships::add);
return relationships;
});
}
/** /**
* Clusters all the candidate genomic locations based on their proximity and align the read to the candidate locations * Clusters all the candidate genomic locations based on their proximity and align the read to the candidate locations
* *
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment