Skip to content
Snippets Groups Projects

Draft: Add WGS read reconstruction for ROI

Open Workum, Dirk-Jan van requested to merge add_wgs_roi into add_gfa_export
Compare and
16 files
+ 1447
57
Compare changes
  • Side-by-side
  • Inline
Files
16
@@ -2,14 +2,18 @@ package nl.wur.bif.pantools.analysis.region_analysis;
@@ -2,14 +2,18 @@ package nl.wur.bif.pantools.analysis.region_analysis;
import nl.wur.bif.pantools.Pantools;
import nl.wur.bif.pantools.Pantools;
import nl.wur.bif.pantools.utils.BeanUtils;
import nl.wur.bif.pantools.utils.BeanUtils;
 
import nl.wur.bif.pantools.utils.FileUtils;
 
import nl.wur.bif.pantools.utils.Globals;
import nl.wur.bif.pantools.utils.GraphUtils;
import nl.wur.bif.pantools.utils.GraphUtils;
import nl.wur.bif.pantools.utils.cli.mixins.SelectGenomes;
import nl.wur.bif.pantools.utils.cli.mixins.SelectGenomes;
 
import nl.wur.bif.pantools.utils.cli.mixins.ThreadNumber;
import nl.wur.bif.pantools.utils.cli.validation.Constraints.OutputDirectory;
import nl.wur.bif.pantools.utils.cli.validation.Constraints.OutputDirectory;
import picocli.CommandLine.*;
import picocli.CommandLine.*;
import java.nio.file.Path;
import java.nio.file.Path;
import java.util.concurrent.Callable;
import java.util.concurrent.Callable;
 
import static nl.wur.bif.pantools.utils.Globals.THREADS;
import static nl.wur.bif.pantools.utils.Globals.setGenomeSelectionOptions;
import static nl.wur.bif.pantools.utils.Globals.setGenomeSelectionOptions;
/**
/**
@@ -20,6 +24,7 @@ import static nl.wur.bif.pantools.utils.Globals.setGenomeSelectionOptions;
@@ -20,6 +24,7 @@ import static nl.wur.bif.pantools.utils.Globals.setGenomeSelectionOptions;
@OutputDirectory(directory = "outputPath")
@OutputDirectory(directory = "outputPath")
public class ExtractFunctionsCLI implements Callable<Integer> {
public class ExtractFunctionsCLI implements Callable<Integer> {
@Spec Model.CommandSpec spec;
@Spec Model.CommandSpec spec;
 
@Mixin private ThreadNumber threadNumber;
@ArgGroup private SelectGenomes selectGenomes;
@ArgGroup private SelectGenomes selectGenomes;
@ParentCommand
@ParentCommand
@@ -75,6 +80,19 @@ public class ExtractFunctionsCLI implements Callable<Integer> {
@@ -75,6 +80,19 @@ public class ExtractFunctionsCLI implements Callable<Integer> {
)
)
private boolean writeRepeats;
private boolean writeRepeats;
 
@Option(names = {"--reads-file"},
 
description = "Tab-delimited file with library names and corresponding read files. Default: none"
 
)
 
private Path readsFile;
 
 
@Option(names = {"--filter-high-frequency"},
 
defaultValue = "true",
 
negatable = true,
 
fallbackValue = "true",
 
description = "Filter out high-frequency k-mers for read reconstruction. Default: true"
 
)
 
private boolean filterHighFrequency;
 
//TODO: add --write-blast option to visualize BLASTP and BLASTN results
//TODO: add --write-blast option to visualize BLASTP and BLASTN results
private String[] functions;
private String[] functions;
@@ -103,20 +121,21 @@ public class ExtractFunctionsCLI implements Callable<Integer> {
@@ -103,20 +121,21 @@ public class ExtractFunctionsCLI implements Callable<Integer> {
GraphUtils.validateRepeats(); // check that repeats are present
GraphUtils.validateRepeats(); // check that repeats are present
}
}
// determine whether to use the fast option
// always use fast option, except when writing GFA or unitigs, or using read files
boolean fast = !writeGfa && !writeUnitigs;
boolean fast = !writeGfa && !writeUnitigs && readsFile == null;
if (!fast) {
if (!fast) {
Pantools.logger.warn("Using a slow method for extraction.");
Pantools.logger.warn("Using a slow method for extraction.");
}
}
final RegionExtractor regionExtractor = new RegionExtractor(outputPath);
final RegionExtractor regionExtractor = new RegionExtractor(outputPath);
regionExtractor.extractFunctions(functions, flanking, writeAnnotations, writeGfa, writeHomology, writeUnitigs, writeRepeats, fast);
regionExtractor.extractFunctions(functions, flanking, writeAnnotations, writeGfa, writeHomology, writeUnitigs, writeRepeats, FileUtils.parseReadsFile(readsFile), filterHighFrequency, fast);
return 0;
return 0;
}
}
private void setGlobalParameters() {
private void setGlobalParameters() {
setGenomeSelectionOptions(selectGenomes);
setGenomeSelectionOptions(selectGenomes);
 
THREADS = threadNumber.getnThreads();
}
}
private void crossValidate() {
private void crossValidate() {
@@ -126,6 +145,11 @@ public class ExtractFunctionsCLI implements Callable<Integer> {
@@ -126,6 +145,11 @@ public class ExtractFunctionsCLI implements Callable<Integer> {
throw new ParameterException(spec.commandLine(), "Functions must be GO, InterPro, Pfam or TIGRFAM");
throw new ParameterException(spec.commandLine(), "Functions must be GO, InterPro, Pfam or TIGRFAM");
}
}
}
}
 
 
// Assert that K is smaller than 64 when using reads (because of storing 2bit version of k-mers in long)
 
if (readsFile != null && Globals.K_SIZE > 63) {
 
throw new ParameterException(spec.commandLine(), "K should be smaller than 63 when using reads");
 
}
}
}
public Pantools getPantools() {
public Pantools getPantools() {
Loading