Skip to content
Snippets Groups Projects
Commit d64d124d authored by Workum, Dirk-Jan van's avatar Workum, Dirk-Jan van
Browse files

fixed proper path writing for GFA

parent db1c003b
Branches
No related tags found
1 merge request!147Add (sub)graph export (AKA region of interest)
Pipeline #68123 passed
......@@ -8,6 +8,8 @@ import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import static nl.wur.bif.pantools.pangenome.GenomeLayer.get_outgoing_edge;
......@@ -137,104 +139,110 @@ public abstract class GfaConverter {
* @param writer The writer to write to.
*/
private void writePaths(BufferedWriter writer) throws IOException {
// get all sequence nodes
Pantools.logger.info("Obtaining sequence nodes.");
final List<Node> sequenceNodeList = new ArrayList<>();
try (Transaction tx = Globals.GRAPH_DB.beginTx();
ResourceIterator<Node> sequenceNodes = Globals.GRAPH_DB.findNodes(Globals.SEQUENCE_LABEL)) {
while (sequenceNodes.hasNext()) {
sequenceNodeList.add(sequenceNodes.next());
}
tx.success();
} catch (NotFoundException nfe) {
Pantools.logger.error("Could not find sequence node.");
throw new RuntimeException("Could not find sequence node", nfe);
}
Pantools.logger.debug("Found {} sequence nodes: {}", sequenceNodeList.size(), sequenceNodeList);
// loop over all sequence nodes
Transaction tx = Globals.GRAPH_DB.beginTx();
try (ResourceIterator<Node> sequenceNodes = Globals.GRAPH_DB.findNodes(Globals.SEQUENCE_LABEL)) {
while (sequenceNodes.hasNext()) {
StringBuilder pathBuilder = new StringBuilder();
// get the sequence node
Node sequenceNode = sequenceNodes.next();
int genomeId = (int) sequenceNode.getProperty("genome");
int sequenceId = (int) sequenceNode.getProperty("number");
String origin = "G" + genomeId + "S" + sequenceId;
Pantools.logger.info("Writing path for sequence {} in genome {} ({}).", sequenceId, genomeId, origin);
// get the first nucleotide node (and check there is only one)
Node nucleotideNode = null;
for (Relationship edge : sequenceNode.getRelationships(Direction.OUTGOING)) {
if (edge.isType(Globals.RelTypes.FF) || edge.isType(Globals.RelTypes.FR)) {
if (nucleotideNode == null) {
nucleotideNode = edge.getEndNode();
} else {
Pantools.logger.error("Sequence node {} has more than one nucleotide node.", sequenceNode.getId());
throw new RuntimeException("Invalid sequence node");
}
for (Node sequenceNode : sequenceNodeList) {
final StringBuilder pathBuilder = new StringBuilder();
// get the sequence node
final int genomeId = (int) sequenceNode.getProperty("genome");
final int sequenceId = (int) sequenceNode.getProperty("number");
final String origin = "G" + genomeId + "S" + sequenceId;
Pantools.logger.info("Writing path for sequence {} in genome {} ({}).", sequenceId, genomeId, origin);
// get the first nucleotide node (and check there is only one)
Node nucleotideNode = null;
for (Relationship edge : sequenceNode.getRelationships(Direction.OUTGOING)) {
if (edge.isType(Globals.RelTypes.FF) || edge.isType(Globals.RelTypes.FR)) {
if (nucleotideNode == null) {
nucleotideNode = edge.getEndNode();
} else {
Pantools.logger.error("Sequence node {} has more than one nucleotide node.", sequenceNode.getId());
throw new RuntimeException("Invalid sequence node");
}
}
if (nucleotideNode == null) {
Pantools.logger.error("Sequence node {} has no nucleotide node.", sequenceNode.getId());
throw new RuntimeException("Invalid sequence node");
}
}
if (nucleotideNode == null) {
Pantools.logger.error("Sequence node {} has no nucleotide node.", sequenceNode.getId());
throw new RuntimeException("Invalid sequence node");
}
// walk the path from the first nucleotide node to the sequence node
int position = 0; // position in the sequence
int transactionSize = 0; // number of transitions
boolean finish = false; // flag to indicate we are done
while (!finish) {
// get the length of the current nucleotide node
int nodeLength = (int) nucleotideNode.getProperty("length");
// get the position of the correct outgoing edge
position += nodeLength - Globals.K_SIZE + 1;
// find the outgoing edge
Relationship edge = get_outgoing_edge(nucleotideNode, origin, position);
if (edge == null) { // check if we found the sequence node (which is at the end)
for (Relationship edge2 : nucleotideNode.getRelationships(Direction.OUTGOING)) {
if (edge2.isType(Globals.RelTypes.FF) || edge2.isType(Globals.RelTypes.FR)) {
if (edge2.getEndNode().equals(sequenceNode)) {
edge = edge2;
finish = true;
break;
}
}
// walk the path from the first nucleotide node to the sequence node
int position = 0; // position in the sequence
int transactionSize = 0; // number of transitions
boolean finish = false; // flag to indicate we are done
while (!finish) {
// get the length of the current nucleotide node
int nodeLength = (int) nucleotideNode.getProperty("length");
// get the position of the correct outgoing edge
position += nodeLength - Globals.K_SIZE + 1;
// find the outgoing edge
Relationship edge = get_outgoing_edge(nucleotideNode, origin, position);
if (edge == null) { // check if we found the sequence node (which is at the end)
for (Relationship edge2 : nucleotideNode.getRelationships(Direction.OUTGOING)) {
if (edge2.getEndNode().equals(sequenceNode)) {
edge = edge2;
finish = true;
break;
}
}
if (edge == null) {
Pantools.logger.error("Could not find outgoing edge for nucleotide node {}.", nucleotideNode.getId());
throw new RuntimeException("Invalid nucleotide node");
}
}
if (edge == null) {
Pantools.logger.error("Could not find outgoing edge for nucleotide node {}.", nucleotideNode.getId());
throw new RuntimeException("Invalid nucleotide node");
}
// get the next node (nucleotide node or sequence node)
Node nextNucleotideNode = edge.getEndNode();
char orientation = (edge.getType().name().charAt(0) == 'F') ? '+' : '-';
// get the next node (nucleotide node or sequence node)
Node nextNucleotideNode = edge.getEndNode();
char orientation = (edge.getType().name().charAt(0) == 'F') ? '+' : '-';
// add the nucleotide node to the path
pathBuilder.append(nucleotideNode.getId())
.append(orientation);
if (!finish) {
pathBuilder.append(PATH_SEPARATOR);
}
// add the nucleotide node to the path
pathBuilder.append(nucleotideNode.getId())
.append(orientation);
if (!finish) {
pathBuilder.append(PATH_SEPARATOR);
}
// log what we found
Pantools.logger.debug("Sequence: {}, Current node: {}, Current orientation: {}, Current position: {}.",
origin, nucleotideNode, orientation, position);
// log what we found
Pantools.logger.trace("Sequence: {}, Current node: {}, Current orientation: {}, Current position: {}.",
origin, nucleotideNode, orientation, position);
// set the next nucleotide node
nucleotideNode = nextNucleotideNode;
// set the next nucleotide node
nucleotideNode = nextNucleotideNode;
// commit the transaction if necessary
++transactionSize;
if (transactionSize >= Globals.MAX_TRANSACTION_SIZE) {
tx.success();
tx.close();
tx = Globals.GRAPH_DB.beginTx();
transactionSize = 0;
}
// commit the transaction if necessary
++transactionSize;
if (transactionSize >= Globals.MAX_TRANSACTION_SIZE) {
tx.success();
tx.close();
tx = Globals.GRAPH_DB.beginTx();
transactionSize = 0;
}
// write the path
writer.write(getPathOutputString(origin, pathBuilder));
writer.newLine();
}
tx.success();
tx.close();
} catch (NotFoundException nfe) {
Pantools.logger.error("Could not find node.");
throw new RuntimeException("Could not find node", nfe);
// write the path
writer.write(getPathOutputString(origin, pathBuilder));
writer.newLine();
}
tx.success();
tx.close();
}
/**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment