diff --git a/INSTALL.4.compile b/INSTALL.4.compile deleted file mode 100644 index b8114bc8cd701d52a4666aaf56e6337993202c37..0000000000000000000000000000000000000000 --- a/INSTALL.4.compile +++ /dev/null @@ -1,3 +0,0 @@ -set -xeu - -make cnidaria jellyfish run diff --git a/INSTALL.md b/INSTALL.md index 31f2ac6f004e4a6b44ad3756985b499249784ac6..b6f8feb8b8eaa16855efb5e43d5376843c66d292 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,81 +1,46 @@ #Summary: -Cnidaria comes pre-compiled. No installation or compilation required. +**Cnidaria comes pre-compiled. No installation or compilation required.** -If you want to do so: -##For the impatient: -<pre> - bash INSTALL.1.python_system_requirements - bash INSTALL.2.python_requirements_globally - echo "export PATH=$PWD/scripts:$PATH" >> $HOME/.bashrc - export PATH=$PWD/scripts:$PATH - cnidaria.py -h -</pre> -##To Run on Ubuntu 12 (compilation required. run before previous commands): -<pre> -bash INSTALL.3.system_compile_requirements_ubuntu_12.04 -bash INSTALL.4.compile -</pre> -##To recompile on Ubuntu 14 (if you feel adventurous): -<pre> -bash INSTALL.3.system_compile_requirements -bash INSTALL.4.compile -</pre> - - -#For the patient: -##Before running -###Install system requirements (compulsory: requires root access) -<pre> -bash INSTALL.1.python_system_requirements -</pre> - -###Install python requirements in one of two modes: -####Globally (Preferable method. requires root access): -<pre> -bash INSTALL.2.python_requirements_globally -</pre> -####Locally (Alternative method. requires root access) -<pre> -bash INSTALL.2.python_requirements_locally -</pre> - - - - -##Running -###Environment +#Running +##Environment - add script folder to your PATH by: - type, from this folder, every time you open a new terminal -<pre> +``` export PATH=$PWD/scripts:$PATH -</pre> +``` - add (only once) cnidaria/scripts to your $HOME/.bashrc and restart (only once) your terminal -<pre> +``` echo "export PATH=$PWD/scripts:$PATH" >> $HOME/.bashrc -</pre> - +``` + -##Compiling Cnidaria from source (only if necessary. requires root access) -###Before compiling -#### Ubuntu 14+ -<pre> -bash INSTALL.3.system_compile_requirements -</pre> +# Compiling (!Not necessary!) +If you still want to do so: -#### Ubuntu 12 -<pre> -bash INSTALL.3.system_compile_requirements_ubuntu_12.04 -</pre> -###Compiling -<pre> -bash INSTALL.4.compile -</pre> +##To recompile on Ubuntu 12: +``` +bash INSTALL.md.4.system_compile_requirements_ubuntu_12.04 +bash INSTALL.md.1.compile +``` +##To recompile on Ubuntu 14: +``` +bash INSTALL.md.3.system_compile_requirements +bash INSTALL.md.4.compile +``` +then: +``` +bash INSTALL.md.2.python_system_requirements +bash INSTALL.md.3.python_requirements_globally +echo "export PATH=$PWD/scripts:$PATH" >> $HOME/.bashrc +export PATH=$PWD/scripts:$PATH +cnidaria.py -h +``` diff --git a/INSTALL.md.1.compile b/INSTALL.md.1.compile new file mode 100644 index 0000000000000000000000000000000000000000..ef9968fb68a0c1b8adf1d666fc3e0c3be7965783 --- /dev/null +++ b/INSTALL.md.1.compile @@ -0,0 +1,3 @@ +set -xeu + +make cnidaria run diff --git a/INSTALL.1.python_system_requirements b/INSTALL.md.2.python_system_requirements similarity index 100% rename from INSTALL.1.python_system_requirements rename to INSTALL.md.2.python_system_requirements diff --git a/INSTALL.2.python_requirements_globally b/INSTALL.md.3.python_requirements_globally similarity index 100% rename from INSTALL.2.python_requirements_globally rename to INSTALL.md.3.python_requirements_globally diff --git a/INSTALL.2.python_requirements_locally b/INSTALL.md.3.python_requirements_locally similarity index 100% rename from INSTALL.2.python_requirements_locally rename to INSTALL.md.3.python_requirements_locally diff --git a/INSTALL.3.system_compile_requirements b/INSTALL.md.4.system_compile_requirements similarity index 100% rename from INSTALL.3.system_compile_requirements rename to INSTALL.md.4.system_compile_requirements diff --git a/INSTALL.3.system_compile_requirements_ubuntu_12.04 b/INSTALL.md.4.system_compile_requirements_ubuntu_12.04 similarity index 100% rename from INSTALL.3.system_compile_requirements_ubuntu_12.04 rename to INSTALL.md.4.system_compile_requirements_ubuntu_12.04 diff --git a/Makefile b/Makefile index e512fd3c411f188f8c37fa89bc921f0c2c89816a..8fe479d8dd56f8385c81364c04859653b5d52747 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,14 @@ -.PHONY: cnidaria jellyfish run test +.PHONY: cnidaria run test -all: cnidaria jellyfish run test +all: cnidaria run test clean: - $(MAKE) -C src clean + $(MAKE) -C src clean $(MAKE) -C test clean cnidaria: $(MAKE) -C src build -jellyfish: - $(MAKE) -C src jelly - run: cnidaria jellyfish scripts/cnidaria.py -h diff --git a/scripts/cnidaria.py b/scripts/cnidaria.py index b1316bcfd5b7cc14eb79b00de57c2f42feebd8d2..ae53eac14719949e01a81a5d90142b6e2a3cdc93 100755 --- a/scripts/cnidaria.py +++ b/scripts/cnidaria.py @@ -14,6 +14,8 @@ print basedir import cnidariapy print cnidariapy.fact(3) +cnidariapy.version() + #for i in `seq 1 20`; do # echo $i diff --git a/scripts/cnidaria_stats.py b/scripts/cnidaria_stats.py index ec4c10fae4e4df1cea4240616dd3baf33918f8ee..43ad38c4a10982daaf723033923e5dfd4707a6f3 100755 --- a/scripts/cnidaria_stats.py +++ b/scripts/cnidaria_stats.py @@ -28,100 +28,14 @@ from cogent.phylo import distance, nj, least_squares, maximum_likelihood from cogent.cluster.UPGMA import upgma from cogent.draw import dendrogram +print " cnidaria stats : importing stats" +import stats +methods_to_apply = [ "jaccard_dissimilarity" ] + +stats.init( methods_to_apply ) -############### -# DISTANCE METHODS -############### -#//Rand Index -#// exclusive A + exclusive B -#// ---------------------------------------- -#// exclusive A + 2x shared AB + exclusive B -#// -#//Jaccard Index -#// shared AB -#// ------------------------------------- -#// exclusive A + shared AB + exclusive B -#// -#//Fowlkes_mallows and Mallows -#// shared AB -#//-------------------------------------------------------- -#//sqrt(( shared AB * unique A ) * ( shared AB * unique B)) -#// -#//Mirkin Metric -#//2 * ( unique A + unique B ) -#// -#//Wallace -#//WAB = Shared AB -#// -------------------- -#// Unique A + Shared AB -#//WBA = Shared AB -#// -------------------- -#// Unique B + Shared AB - - -# 1 0 -# 1 a b -# 0 c d -# n = a + b + c + d - - -def jaccard_coefficient( totalX, totalY, countX, countY, val): - #M11 / (M01 + M10 + M11) - try: - r = ( float( val ) / (( countX + countY ) - val ) ) - except ZeroDivisionError: - print "jaccard_coefficient: DIVISION BY ZERO" - print totalX, totalY, countX, countY, val - sys.exit(0) - return r - -def jaccard_dissimilarity_sqrt( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal): - #sqrt( 1-Jindex ) - r = math.sqrt( jaccard_dissimilarity( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal ) ) - return r - -def jaccard_dissimilarity( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal): - #sqrt( 1-Jindex ) - r = 1 - jaccard_coefficient( totalX, totalY, countX, countY, val ) - return r - - -methods_available = { - #"jaccard_dissimilarity_sqrt": jaccard_dissimilarity_sqrt, - "jaccard_dissimilarity" : jaccard_dissimilarity -} - -def attachMethodName( methodName, func ): - print "attaching method", methodName - if methodName not in methods_available: - print "unknown method:", methodName - sys.exit(1) - - def ffunc(dissi, x, y, totalX, totalY, countX, countY, val): - #print "running attached function", methodName - exclusiveXCount = ( countX - val ) - exclusiveYCount = ( countY - val ) - - exclusiveXTotal = ( totalX - val ) - exclusiveYTotal = ( totalY - val ) - - differenceExclusiveXYCount = exclusiveXCount + exclusiveYCount - sumSharedXY = ( totalX - countX ) + ( totalY - countY ) - - differenceCountXY = ( countX + countY ) - differenceExclusiveXYTotal = ( totalX + totalY ) - - r = func( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal) - - #print "dissi method %-30s x %3d y %3d r %.5f totalX %12d totalY %12d countX %12d countY %12d val %12d" % ( methodName, x, y, r, totalX, totalY, countX, countY, val ) - dissi[ methodName ][x][y] += r - - return ffunc -for methodName in methods_available.keys(): - methods_available[ methodName ] = attachMethodName( methodName, methods_available[ methodName ] ) - ############### @@ -139,6 +53,7 @@ class statsfh(object): self.filetype = self.jfinst.getKey( "filetype" ) self.speciesNames = self.jfinst.getKey( "in_filenames" ) + self.num_kmers = self.jfinst.getKey( "in_filenames" ) self.speciesPosition = {} self.speciesCount = {} @@ -363,21 +278,21 @@ class statsfh(object): #sys.exit(0) -def calcDistance(stats, methods=methods_available.keys(), matrixValue="Valid", matrixType="Raw"): +def calcDistance(statistics, methods=stats.methods_enabled.keys(), matrixValue="Valid", matrixType="Raw"): #part, maxNameLen, diss, data print "CALCULATING DISTANCE ... creating empty ... MATRIX VALUE", matrixValue,"... MATRIX TYPE", matrixType - assert(matrixType in stats.matrix) + assert(matrixType in statistics.matrix) - names = stats.speciesNames - data = stats.matrix[matrixType] - analysisName = stats.scaleType - numSpps = stats.numSpps + names = statistics.speciesNames + data = statistics.matrix[matrixType] + analysisName = statistics.scaleType + numSpps = statistics.numSpps dissi = {} print "CALCULATING DISTANCE ... creating empty" for methodName in methods: - print "CALCULATING DISTANCE ... creating empty ... METHOD",methodName + print "CALCULATING DISTANCE ... creating empty ... METHOD", methodName dissi[ methodName ] = [ None ] * numSpps for x in xrange( numSpps ): @@ -389,20 +304,20 @@ def calcDistance(stats, methods=methods_available.keys(), matrixValue="Valid", m #print dissi print "CALCULATING DISTANCE ... converting" - print "CALCULATING DISTANCE ... converting ... ANALYSIS",analysisName,"... MATRIX VALUE", matrixValue,"... MATRIX TYPE", matrixType + print "CALCULATING DISTANCE ... converting ... ANALYSIS", analysisName, "... MATRIX VALUE", matrixValue, "... MATRIX TYPE", matrixType for x in xrange( numSpps ): - totalX = stats.speciesCount["Total" ][matrixType][ x ] - countX = stats.speciesCount[matrixValue][matrixType][ x ] + totalX = statistics.speciesCount["Total" ][matrixType][ x ] + countX = statistics.speciesCount[matrixValue][matrixType][ x ] for y in xrange( numSpps ): - totalY = stats.speciesCount["Total" ][matrixType][ y ] - countY = stats.speciesCount[matrixValue][matrixType][ y ] + totalY = statistics.speciesCount["Total" ][matrixType][ y ] + countY = statistics.speciesCount[matrixValue][matrixType][ y ] - val = stats.matrix[matrixType][x][y] + val = statistics.matrix[matrixType][x][y] for methodName in methods: - methodFunc = methods_available[methodName] + methodFunc = stats.methods_enabled[methodName] methodFunc(dissi, x, y, totalX, totalY, countX, countY, val) return dissi @@ -496,10 +411,10 @@ def dissi2dissimatrix(dissi, stats): -def fixTitles( titles, stats ): +def fixTitles( titles, statistics ): print "tree species" print "\t", - print "\n\t".join( sorted(stats.speciesPosition.keys()) ) + print "\n\t".join( sorted(statistics.speciesPosition.keys()) ) print print "file names" print "\t", @@ -507,16 +422,16 @@ def fixTitles( titles, stats ): print foundnames = [] - for fname in sorted(stats.speciesPosition): + for fname in sorted(statistics.speciesPosition): found = False for tname in titles: if tname in fname: fnewname = titles[ tname ] print " renaming", fname, "to", fnewname - pos = stats.speciesPosition[ fname ] - stats.speciesNames[ pos ] = fnewname - stats.speciesPosition[ fnewname ] = pos - del stats.speciesPosition[ fname ] + pos = statistics.speciesPosition[ fname ] + statistics.speciesNames[ pos ] = fnewname + statistics.speciesPosition[ fnewname ] = pos + del statistics.speciesPosition[ fname ] found = True break @@ -580,7 +495,7 @@ def exportMatrices(infile, matrices, stats): -def processBin( infile, filetitles=None, ignore_file=None, scaleType=statsfh.SCALE_NONE, methods=methods_available.keys(), matrixType="Raw" ): +def processBin( infile, filetitles=None, ignore_file=None, scaleType=statsfh.SCALE_NONE, methods=stats.methods_enabled.keys(), matrixType="Raw" ): if not os.path.exists(infile): print "input file %s does not exists" % infile sys.exit(1) @@ -597,7 +512,7 @@ def processBin( infile, filetitles=None, ignore_file=None, scaleType=statsfh.SCA print "READING FILE" - stats = statsfh(infile, scaleType=scaleType, ignore_file=ignore_file) + statistics = statsfh(infile, scaleType=scaleType, ignore_file=ignore_file) print "READING FILE ... done" @@ -605,31 +520,31 @@ def processBin( infile, filetitles=None, ignore_file=None, scaleType=statsfh.SCA print "PRINTING DISTANCE" if filetitles is not None: titles = readFilesTitles(filetitles) - fixTitles( titles, stats ) + fixTitles( titles, statistics ) - stats.saveCSV() + statistics.saveCSV() print "CALCULATING DISTANCE" - dissi = calcDistance( stats, methods=methods, matrixType=matrixType ) + dissi = calcDistance( statistics, methods=methods, matrixType=matrixType ) print "CALCULATING DISTANCE ... done" - printDissi( stats, dissi, infile ) + printDissi( statistics, dissi, infile ) print "PRINTING DISTANCE .. done" print "CONVERTING MATRIX" - matrices = dissi2dissimatrix(dissi, stats) + matrices = dissi2dissimatrix(dissi, statistics) print "CONVERTING MATRIX ... done" print "EXPORTING NJ" - exportMatrices(infile, matrices, stats) + exportMatrices(infile, matrices, statistics) print "EXPORTING NJ ... done" @@ -662,7 +577,7 @@ def main(): print "no ignore file given" - methods = methods_available.keys() + methods = stats.methods_enabled.keys() processBin( infile, filetitles=filetitles, ignore_file=ignore_file, scaleType=statsfh.SCALE_NONE , methods=methods ) diff --git a/scripts/stats.py b/scripts/stats.py new file mode 100755 index 0000000000000000000000000000000000000000..be0011315753aa2c194d51eeff41ce0f8376bd7b --- /dev/null +++ b/scripts/stats.py @@ -0,0 +1,138 @@ +############### +# DISTANCE METHODS +############### +#//Rand Index +#// exclusive A + exclusive B +#// ---------------------------------------- +#// exclusive A + 2x shared AB + exclusive B +#// +#//Jaccard Index +#// shared AB +#// ------------------------------------- +#// exclusive A + shared AB + exclusive B +#// +#//Fowlkes_mallows and Mallows +#// shared AB +#//-------------------------------------------------------- +#//sqrt(( shared AB * unique A ) * ( shared AB * unique B)) +#// +#//Mirkin Metric +#//2 * ( unique A + unique B ) +#// +#//Wallace +#//WAB = Shared AB +#// -------------------- +#// Unique A + Shared AB +#//WBA = Shared AB +#// -------------------- +#// Unique B + Shared AB + + +# 1 0 +# 1 a b +# 0 c d +# n = a + b + c + d + + +def jaccard_coefficient( totalX, totalY, countX, countY, val): + #M11 / (M01 + M10 + M11) + try: + r = ( float( val ) / (( countX + countY ) - val ) ) + except ZeroDivisionError: + print "jaccard_coefficient: DIVISION BY ZERO" + print totalX, totalY, countX, countY, val + sys.exit(0) + return r + +def jaccard_dissimilarity_sqrt( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal): + #sqrt( 1-Jindex ) + r = math.sqrt( jaccard_dissimilarity( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal ) ) + return r + +def jaccard_dissimilarity( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal): + #sqrt( 1-Jindex ) + r = 1 - jaccard_coefficient( totalX, totalY, countX, countY, val ) + return r + + + +def pearson_chi_squared( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal): + #sqrt( 1-Jindex ) + + try: + s = 0 + #( float( val ) / (( countX + countY ) - val ) ) + + except ZeroDivisionError: + print "jaccard_coefficient: DIVISION BY ZERO" + print totalX, totalY, countX, countY, val + sys.exit(0) + + r = 1 - s + + return r + + +def pearson_I( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal): + r = pearson_chi_squared( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal) + + return r + + +def pearson_II( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal): + q2 = pearson_chi_squared( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal) + n = exclusiveXCount + exclusiveYCount + r = r#q2 / + + return r + + +def attachMethodName( methodName, func ): + print "attaching method", methodName + if methodName not in methods_enabled: + print "unknown method:", methodName + sys.exit(1) + + def ffunc(dissi, x, y, totalX, totalY, countX, countY, val): + #print "running attached function", methodName + exclusiveXCount = ( countX - val ) + exclusiveYCount = ( countY - val ) + + exclusiveXTotal = ( totalX - val ) + exclusiveYTotal = ( totalY - val ) + + differenceExclusiveXYCount = exclusiveXCount + exclusiveYCount + sumSharedXY = ( totalX - countX ) + ( totalY - countY ) + + differenceCountXY = ( countX + countY ) + differenceExclusiveXYTotal = ( totalX + totalY ) + + r = func( totalX, totalY, countX, countY, val, exclusiveXCount, exclusiveXTotal, exclusiveYCount, exclusiveYTotal, differenceExclusiveXYCount, sumSharedXY, differenceCountXY, differenceExclusiveXYTotal) + + #print "dissi method %-30s x %3d y %3d r %.5f totalX %12d totalY %12d countX %12d countY %12d val %12d" % ( methodName, x, y, r, totalX, totalY, countX, countY, val ) + dissi[ methodName ][x][y] += r + + return ffunc + + +def init( methods_to_apply ): + for m in methods_to_apply: + if m not in methods_available: + print " unkknown method %s" % m + sys.exit(1) + + print "enabling %s method" % m + + methods_enabled[m] = methods_available[m] + + for methodName in methods_enabled.keys(): + methods_enabled[ methodName ] = attachMethodName( methodName, methods_enabled[ methodName ] ) + + +methods_available = { + "jaccard_dissimilarity_sqrt": jaccard_dissimilarity_sqrt, + "jaccard_dissimilarity" : jaccard_dissimilarity +} + +methods_enabled = {} + diff --git a/src/_cnidariapy.so b/src/_cnidariapy.so index 0252e4a20f7db2a6ecc44b6a011a9edefb2b12eb..054ac6f87cc099041db708b629b1b20582ef54b8 100755 Binary files a/src/_cnidariapy.so and b/src/_cnidariapy.so differ diff --git a/src/build/_cnidariapy.so b/src/build/_cnidariapy.so index 0252e4a20f7db2a6ecc44b6a011a9edefb2b12eb..054ac6f87cc099041db708b629b1b20582ef54b8 100755 Binary files a/src/build/_cnidariapy.so and b/src/build/_cnidariapy.so differ diff --git a/src/build/cnidariapy.py b/src/build/cnidariapy.py index c795861a85b0d06f2d479fe01d419e49868c8222..3592309ba42d1118233bdc1e4151086a1fc82076 100644 --- a/src/build/cnidariapy.py +++ b/src/build/cnidariapy.py @@ -174,6 +174,10 @@ def fact(*args): return _cnidariapy.fact(*args) fact = _cnidariapy.fact +def version(): + return _cnidariapy.version() +version = _cnidariapy.version + def openoutfile(*args): return _cnidariapy.openoutfile(*args) openoutfile = _cnidariapy.openoutfile diff --git a/src/cnidariapy.py b/src/cnidariapy.py index c795861a85b0d06f2d479fe01d419e49868c8222..3592309ba42d1118233bdc1e4151086a1fc82076 100644 --- a/src/cnidariapy.py +++ b/src/cnidariapy.py @@ -174,6 +174,10 @@ def fact(*args): return _cnidariapy.fact(*args) fact = _cnidariapy.fact +def version(): + return _cnidariapy.version() +version = _cnidariapy.version + def openoutfile(*args): return _cnidariapy.openoutfile(*args) openoutfile = _cnidariapy.openoutfile diff --git a/src/headers/cnidariapy.i b/src/headers/cnidariapy.i index 24aee40fa5b7f2518ae895072460de55f671b559..0ed76680f5d7dc370af275600b7f659c61b6771a 100644 --- a/src/headers/cnidariapy.i +++ b/src/headers/cnidariapy.i @@ -12,6 +12,7 @@ namespace std { /* Includes the header in the wrapper code */ #include "shared.hpp" #include "cnidaria.hpp" + using namespace cnidaria; %} diff --git a/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.a b/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.a index 093a5f6ae39976c51867780be6cb3b05e69d511a..ae7703f2e365791f1bbcd9965d33b76166798800 100644 Binary files a/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.a and b/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.a differ diff --git a/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.lai b/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.lai index 01d5ea47128d1651f23a6e514d0b78ebce793358..485af3ffe3859d818669061160c02853607c5c11 100644 --- a/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.lai +++ b/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.lai @@ -17,7 +17,7 @@ old_library='libjellyfish-2.0.a' inherited_linker_flags='' # Libraries that this one depends upon. -dependency_libs=' -L/home/aflit001/lib' +dependency_libs='' # Names of additional weak libraries provided by this library weak_library_names='' diff --git a/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.so.2.0.0 b/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.so.2.0.0 index 577376e20297bdad3ca7df50f86509e4e4b4cf8a..4f561beffd0194f715191e5e2e9d2e1f0474b7d3 100755 Binary files a/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.so.2.0.0 and b/src/libs/jellyfish-2.1.3/.libs/libjellyfish-2.0.so.2.0.0 differ diff --git a/src/libs/jellyfish-2.1.3/bin/.libs/jellyfish b/src/libs/jellyfish-2.1.3/bin/.libs/jellyfish index 3f6a9242df26104ffa1f7860d1563414d463c273..93c61963641331a0e77b4d305937eb320d864999 100755 Binary files a/src/libs/jellyfish-2.1.3/bin/.libs/jellyfish and b/src/libs/jellyfish-2.1.3/bin/.libs/jellyfish differ diff --git a/src/libs/jellyfish-2.1.3/bin/jellyfish b/src/libs/jellyfish-2.1.3/bin/jellyfish index 28ba23cec1d2dd8ff39570768751ba94badee6af..ff3a9badf63a5024a93d4f7fc3f3358da7528c2e 100755 Binary files a/src/libs/jellyfish-2.1.3/bin/jellyfish and b/src/libs/jellyfish-2.1.3/bin/jellyfish differ diff --git a/src/libs/jellyfish-2.1.3/libtool b/src/libs/jellyfish-2.1.3/libtool index d5ba9096823b5def2470a2055cf1376cba007b2d..0000a7128c99c972432124043184bd0e644b2015 100755 --- a/src/libs/jellyfish-2.1.3/libtool +++ b/src/libs/jellyfish-2.1.3/libtool @@ -275,7 +275,7 @@ hardcode_into_libs=yes sys_lib_search_path_spec="/usr/lib/gcc/x86_64-linux-gnu/4.8 /usr/lib/x86_64-linux-gnu /usr/lib /lib/x86_64-linux-gnu /lib " # Run-time system search path for libraries. -sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib /usr/lib/x86_64-linux-gnu/libfakeroot /lib/i386-linux-gnu /usr/lib/i386-linux-gnu /lib/i686-linux-gnu /usr/lib/i686-linux-gnu /usr/local/lib /lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/mesa-egl /usr/lib/nvidia-331-updates /usr/lib32/nvidia-331-updates /lib32 /usr/lib32 /libx32 /usr/libx32 " +sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib /usr/lib/x86_64-linux-gnu/libfakeroot /lib/i386-linux-gnu /usr/lib/i386-linux-gnu /lib/i686-linux-gnu /usr/lib/i686-linux-gnu /usr/local/lib /lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/mesa-egl /usr/lib/nvidia-340-updates /usr/lib32/nvidia-340-updates /lib32 /usr/lib32 /libx32 /usr/libx32 " # Whether dlopen is supported. dlopen_support=unknown diff --git a/src/src_cpp/cnidaria.cpp b/src/src_cpp/cnidaria.cpp index 8eb08b2ed583538cc659a7dfd15f29fb3717e229..cd331a3806c050de14ad22de5716eebe5059d078 100644 --- a/src/src_cpp/cnidaria.cpp +++ b/src/src_cpp/cnidaria.cpp @@ -16,6 +16,12 @@ int fact (int n) return n * fact(n-1); } } +void version () + { + std::cout << "cnidaria version: " << __CNIDARIA_VERSION__ << "\n" + << "build date : " << "Sep 29 2015" << "\n" + << "build time : " << "19:35:57" << std::endl; +} namespace cnidaria { void openoutfile (std::ofstream & outfile_, string_t filename) @@ -234,8 +240,9 @@ namespace cnidaria exit(1); } - hda_g.num_pieces = 1; - hda_g.piece_num = 0; + hda_g.num_pieces = 1; + hda_g.piece_num = 0; + hda_g.complete_registers = num_registers; hda_g.print(); std::cout << " finished merging successfully. saving header" << std::endl; @@ -479,8 +486,9 @@ namespace cnidaria exit(1); } - hda_g.num_pieces = 1; - hda_g.piece_num = 0; + hda_g.num_pieces = 1; + hda_g.piece_num = 0; + hda_g.complete_registers = num_registers; hda_g.print(); std::cout << " finished merging successfully. saving header" << std::endl; @@ -543,6 +551,7 @@ namespace cnidaria baseint_vec_t num_kmer_total_spp; baseint_vec_t num_kmer_valid_spp; j_matrix_s_vec_t j_matrices; + baseInt sumRegisters = 0; hda.infiles = &infiles; @@ -591,7 +600,7 @@ namespace cnidaria std::cout << "opening out file " << out_file + EXT_MATRIX << std::endl; openoutfile( oufile_, out_file + EXT_MATRIX ); - cnidaria_header_rw hd_g = cnidaria_header_rw (); + cnidaria_header_rw hd_g = cnidaria_header_rw (); header_data hda_g; string_vec_t infiles_g; @@ -621,7 +630,7 @@ namespace cnidaria std::cout << " parsing in file " << std::endl; header_data hda; - cnidaria_header_rw hd = cnidaria_header_rw (); + cnidaria_header_rw hd = cnidaria_header_rw (); string_vec_t infiles; string_vec_t srcfiles; @@ -782,7 +791,7 @@ namespace cnidaria { piece_data::piece_data (string_vec_t & srcfiles_, string_t & out_file_, baseInt num_threads_, baseInt minVal_, baseInt save_every_, bool export_complete_, bool export_summary_, bool export_matrix_, baseInt num_pieces_, baseInt piece_num_) : srcfiles (srcfiles_), out_file (out_file_), num_threads (num_threads_), minVal (minVal_), save_every (save_every_), export_complete (export_complete_), export_summary (export_summary_), export_matrix (export_matrix_), num_pieces (num_pieces_), piece_num (piece_num_) - { + { merger = new merge_jfs( srcfiles_, out_file_ ); locker = new boost::recursive_mutex; } @@ -805,6 +814,11 @@ namespace cnidaria std::cout << "waiting for threads" << std::endl; tp.wait(); std::cout << "threads finished" << std::endl; + std::cout << "sent all pieces" << std::endl; + for ( baseInt piece_num = 0; piece_num < data.size(); ++piece_num ) { + std::cout << "piece" << piece_num << " = " + << "gCounter" << data[piece_num].merger->get_complete_registers() << std::endl; + } } } namespace cnidaria @@ -827,7 +841,7 @@ namespace cnidaria data.merger->run( data.locker ); - std::cout << "saving" << std::endl; + std::cout << "piece sent gCounter: " << data.merger->get_complete_registers() << std::endl; } } namespace cnidaria @@ -838,6 +852,9 @@ namespace cnidaria send_piece( d ); + std::cout << "saving" << std::endl; + std::cout << "data sent gCounter: " << d.merger->get_complete_registers() << std::endl; + d.merger->save_all( out_file ); } } diff --git a/src/src_cpp/cnidaria.hpp b/src/src_cpp/cnidaria.hpp index 96d7e93b016f761b6ed8192787851e6d7d2f33b6..0f3129d59132b5a7ff7ef67f73e0a2b2950774ff 100644 --- a/src/src_cpp/cnidaria.hpp +++ b/src/src_cpp/cnidaria.hpp @@ -38,6 +38,7 @@ #define LZZ_INLINE inline int fact (int n); +void version (); namespace cnidaria { void openoutfile (std::ofstream & outfile_, string_t filename); diff --git a/src/src_cpp/cnidaria_methods.cpp b/src/src_cpp/cnidaria_methods.cpp index 63aced321220a0dc0b3423be49c985e4d28c072f..fca745cd3a22a3f1da57fb926c3ac4fcd00a8aae 100644 --- a/src/src_cpp/cnidaria_methods.cpp +++ b/src/src_cpp/cnidaria_methods.cpp @@ -82,6 +82,18 @@ namespace cnidaria } } namespace cnidaria +{ + void cnidaria_db::set_complete_registers (baseInt cr) + { complete_registers = cr; + } +} +namespace cnidaria +{ + baseInt cnidaria_db::get_complete_registers () + { return complete_registers; + } +} +namespace cnidaria { void cnidaria_db::set_export_summary (bool s) { export_summary = s; @@ -1002,6 +1014,12 @@ namespace cnidaria } } namespace cnidaria +{ + baseInt merge_jfs::get_complete_registers () + { return hash_table.get_complete_registers(); + } +} +namespace cnidaria { void merge_jfs::set_save_every (baseInt pe) { save_every = pe; @@ -1044,6 +1062,18 @@ namespace cnidaria } } namespace cnidaria +{ + void merge_jfs::set_complete_registers (baseInt cr) + { hash_table.set_complete_registers( cr ); + } +} +namespace cnidaria +{ + void merge_jfs::append_complete_registers (baseInt cr) + { hash_table.set_complete_registers( get_complete_registers() + cr ); + } +} +namespace cnidaria { void merge_jfs::set_export_summary (bool s) { hash_table.set_export_summary( s ); export_summary = s; @@ -1182,6 +1212,7 @@ namespace cnidaria void merge_jfs::run (boost::recursive_mutex * g_guard_s) { updateHeaderData(); + std::cout << "starting phylogenomics with " << num_threads << " threads" << std::endl; std::cout << "starting " << num_threads << " threads" << std::endl; @@ -1213,9 +1244,13 @@ namespace cnidaria ); } - std::cout << "waiting for threads" << std::endl; + std::cout << "waiting for sub threads" << std::endl; tp.wait(); - std::cout << "threads finished" << std::endl; + std::cout << "sub threads finished" + << "gCounter " << gCounter << std::endl; + + + append_complete_registers(gCounter); } } namespace cnidaria @@ -1234,8 +1269,8 @@ namespace cnidaria - baseInt num_threadsL = this->num_pieces * this->num_threads; - baseInt thread_numL = ( this->piece_num * this->num_threads ) + thread_num; + baseInt num_threadsL = this->num_pieces * this->num_threads; + baseInt thread_numL = ( this->piece_num * this->num_threads ) + thread_num; string_t name = (boost::format("merge jfs pc %3d/%3d thr %3d/%3d") % (piece_num+1) % this->num_pieces % (thread_num + 1) % this->num_threads).str(); diff --git a/src/src_cpp/cnidaria_methods.hpp b/src/src_cpp/cnidaria_methods.hpp index ca91ea4ff1b39dab24b0964fe5982eabe4fa50f9..9777cac48be741abed1c9a53b61998dd78913437 100644 --- a/src/src_cpp/cnidaria_methods.hpp +++ b/src/src_cpp/cnidaria_methods.hpp @@ -50,6 +50,8 @@ namespace cnidaria void set_max_val (double ma); void set_min_val (baseInt mi); void set_max_val (baseInt ma); + void set_complete_registers (baseInt cr); + baseInt get_complete_registers (); void set_export_summary (bool s); void set_export_matrix (bool s); void set_export_complete (header_data & hda, bool s, string_t filename = ""); @@ -203,6 +205,7 @@ namespace cnidaria static baseInt sCounter; merge_jfs (string_vec_t & srcfilesl, string_t basenamel = "cnidaria_db"); void init (); + baseInt get_complete_registers (); void set_save_every (baseInt pe); void set_num_pieces (baseInt np); void set_piece_num (baseInt pn); @@ -210,6 +213,8 @@ namespace cnidaria void set_max_val (double ma); void set_min_val (baseInt mi); void set_max_val (baseInt ma); + void set_complete_registers (baseInt cr); + void append_complete_registers (baseInt cr); void set_export_summary (bool s); void set_export_matrix (bool s); void set_export_complete (bool s, string_t filename = ""); diff --git a/src/src_cpp/cnidariapy.py b/src/src_cpp/cnidariapy.py index c795861a85b0d06f2d479fe01d419e49868c8222..3592309ba42d1118233bdc1e4151086a1fc82076 100644 --- a/src/src_cpp/cnidariapy.py +++ b/src/src_cpp/cnidariapy.py @@ -174,6 +174,10 @@ def fact(*args): return _cnidariapy.fact(*args) fact = _cnidariapy.fact +def version(): + return _cnidariapy.version() +version = _cnidariapy.version + def openoutfile(*args): return _cnidariapy.openoutfile(*args) openoutfile = _cnidariapy.openoutfile diff --git a/src/src_cpp/cnidariapy_wrap.cxx b/src/src_cpp/cnidariapy_wrap.cxx index fcc038ee5a03dc90327a5c2813cb7b97e17fdf4c..d5b7aed53d1113a3de4d2f1cffb6eb46178c8d78 100644 --- a/src/src_cpp/cnidariapy_wrap.cxx +++ b/src/src_cpp/cnidariapy_wrap.cxx @@ -4870,6 +4870,7 @@ SWIGINTERN void std_vector_Sl_std_string_Sg__append(std::vector< std::string > * /* Includes the header in the wrapper code */ #include "shared.hpp" #include "cnidaria.hpp" + using namespace cnidaria; @@ -7921,6 +7922,18 @@ fail: } +SWIGINTERN PyObject *_wrap_version(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + + if (!PyArg_ParseTuple(args,(char *)":version")) SWIG_fail; + version(); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + SWIGINTERN PyObject *_wrap_openoutfile(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { PyObject *resultobj = 0; std::ofstream *arg1 = 0 ; @@ -9439,6 +9452,7 @@ static PyMethodDef SwigMethods[] = { { (char *)"delete_intPair", _wrap_delete_intPair, METH_VARARGS, NULL}, { (char *)"intPair_swigregister", intPair_swigregister, METH_VARARGS, NULL}, { (char *)"fact", _wrap_fact, METH_VARARGS, NULL}, + { (char *)"version", _wrap_version, METH_VARARGS, NULL}, { (char *)"openoutfile", _wrap_openoutfile, METH_VARARGS, NULL}, { (char *)"openinfile", _wrap_openinfile, METH_VARARGS, NULL}, { (char *)"merge_complete", _wrap_merge_complete, METH_VARARGS, NULL}, diff --git a/src/src_lzz/cnidaria.lzz b/src/src_lzz/cnidaria.lzz index f57d818e1085fa91c452d94726a67e7c62b23e90..7918b742c1f24cee49de5a5e30fb6caed25f3437 100644 --- a/src/src_lzz/cnidaria.lzz +++ b/src/src_lzz/cnidaria.lzz @@ -52,6 +52,13 @@ int fact(int n) { } } + +void version() { + std::cout << "cnidaria version: " << __CNIDARIA_VERSION__ << "\n" + << "build date : " << __DATE__ << "\n" + << "build time : " << __TIME__ << std::endl; +} + namespace cnidaria { void openoutfile( std::ofstream &outfile_, string_t filename ) { try { @@ -262,8 +269,9 @@ namespace cnidaria { exit(1); } - hda_g.num_pieces = 1; - hda_g.piece_num = 0; + hda_g.num_pieces = 1; + hda_g.piece_num = 0; + hda_g.complete_registers = num_registers; hda_g.print(); std::cout << " finished merging successfully. saving header" << std::endl; @@ -504,8 +512,9 @@ namespace cnidaria { exit(1); } - hda_g.num_pieces = 1; - hda_g.piece_num = 0; + hda_g.num_pieces = 1; + hda_g.piece_num = 0; + hda_g.complete_registers = num_registers; hda_g.print(); std::cout << " finished merging successfully. saving header" << std::endl; @@ -568,6 +577,7 @@ namespace cnidaria { baseint_vec_t num_kmer_total_spp; baseint_vec_t num_kmer_valid_spp; j_matrix_s_vec_t j_matrices; + baseInt sumRegisters = 0; hda.infiles = &infiles; @@ -624,7 +634,7 @@ namespace cnidaria { std::cout << "opening out file " << out_file + EXT_MATRIX << std::endl; openoutfile( oufile_, out_file + EXT_MATRIX ); - cnidaria_header_rw hd_g = cnidaria_header_rw (); + cnidaria_header_rw hd_g = cnidaria_header_rw (); header_data hda_g; string_vec_t infiles_g; @@ -654,7 +664,7 @@ namespace cnidaria { std::cout << " parsing in file " << std::endl; header_data hda; - cnidaria_header_rw hd = cnidaria_header_rw (); + cnidaria_header_rw hd = cnidaria_header_rw (); string_vec_t infiles; string_vec_t srcfiles; @@ -835,16 +845,17 @@ namespace cnidaria { baseInt num_pieces_, baseInt piece_num_ ): - srcfiles( srcfiles_), - out_file( out_file_), - num_threads( num_threads_), - minVal( minVal_), - save_every( save_every_), - export_complete( export_complete_), - export_summary( export_summary_), - export_matrix( export_matrix_), - num_pieces( num_pieces_), - piece_num( piece_num_) { + srcfiles( srcfiles_ ), + out_file( out_file_ ), + num_threads( num_threads_ ), + minVal( minVal_ ), + save_every( save_every_ ), + export_complete( export_complete_ ), + export_summary( export_summary_ ), + export_matrix( export_matrix_ ), + num_pieces( num_pieces_ ), + piece_num( piece_num_ ) + { merger = new merge_jfs( srcfiles_, out_file_ ); locker = new boost::recursive_mutex; } @@ -868,6 +879,11 @@ namespace cnidaria { std::cout << "waiting for threads" << std::endl; tp.wait(); std::cout << "threads finished" << std::endl; + std::cout << "sent all pieces" << std::endl; + for ( baseInt piece_num = 0; piece_num < data.size(); ++piece_num ) { + std::cout << "piece" << piece_num << " = " + << "gCounter" << data[piece_num].merger->get_complete_registers() << std::endl; + } } void send_piece( piece_data data ) { @@ -887,8 +903,7 @@ namespace cnidaria { data.merger->run( data.locker ); - std::cout << "saving" << std::endl; - //data.merger->save( data.out_file ); + std::cout << "piece sent gCounter: " << data.merger->get_complete_registers() << std::endl; } void send_data( @@ -907,6 +922,9 @@ namespace cnidaria { send_piece( d ); + std::cout << "saving" << std::endl; + std::cout << "data sent gCounter: " << d.merger->get_complete_registers() << std::endl; + d.merger->save_all( out_file ); } diff --git a/src/src_lzz/cnidaria_methods.lzz b/src/src_lzz/cnidaria_methods.lzz index 81380fff319e0dd4bef04600ba737d9e30d84d3b..eb13141659f8fb02f2bc8946fbac9077888b2f94 100644 --- a/src/src_lzz/cnidaria_methods.lzz +++ b/src/src_lzz/cnidaria_methods.lzz @@ -139,7 +139,9 @@ namespace cnidaria { std::cout << " set to " << max_val_i << " min val " << min_val_i << "\n"; } - + void set_complete_registers( baseInt cr ) { complete_registers = cr; } + baseInt get_complete_registers() { return complete_registers; } + void set_export_summary( bool s ) { export_summary = s; } void set_export_matrix( bool s ) { export_matrix = s; } void set_export_complete( header_data &hda, bool s, string_t filename="" ) { if ( filename=="" ) { filename=basename; }; if ( s ) { enable_complete( hda, filename ); } else { disable_complete(); }; } @@ -871,6 +873,7 @@ namespace cnidaria { updateHeaderData(); } + baseInt get_complete_registers() { return hash_table.get_complete_registers(); } void set_save_every( baseInt pe ) { save_every = pe; } void set_num_pieces( baseInt np ) { num_pieces = np; } void set_piece_num( baseInt pn ) { piece_num = pn; } @@ -878,6 +881,8 @@ namespace cnidaria { void set_max_val( double ma ) { hash_table.set_max_val( ma ); } void set_min_val( baseInt mi ) { hash_table.set_min_val( mi ); } void set_max_val( baseInt ma ) { hash_table.set_max_val( ma ); } + void set_complete_registers( baseInt cr ) { hash_table.set_complete_registers( cr ); } + void append_complete_registers(baseInt cr ) { hash_table.set_complete_registers( get_complete_registers() + cr ); } void set_export_summary( bool s ) { hash_table.set_export_summary( s ); export_summary = s; } void set_export_matrix( bool s ) { hash_table.set_export_matrix( s ); export_matrix = s; } void set_export_complete( bool s, string_t filename="" ) { updateHeaderData(); if(filename==""){filename=basename;}; hash_table.set_export_complete( hda, s, filename ); export_complete = s; } @@ -919,6 +924,7 @@ namespace cnidaria { void run( boost::recursive_mutex *g_guard_s=NULL ) { updateHeaderData(); + std::cout << "starting phylogenomics with " << num_threads << " threads" << std::endl; std::cout << "starting " << num_threads << " threads" << std::endl; @@ -950,9 +956,13 @@ namespace cnidaria { ); } - std::cout << "waiting for threads" << std::endl; + std::cout << "waiting for sub threads" << std::endl; tp.wait(); - std::cout << "threads finished" << std::endl; + std::cout << "sub threads finished" + << "gCounter " << gCounter << std::endl; + + //hda.complete_registers = gCounter; + append_complete_registers(gCounter); } void run_process( const baseInt thread_num, boost::recursive_mutex *g_guard_s, boost::recursive_mutex *g_guard_m, boost::recursive_mutex *g_guard_e ) { @@ -968,8 +978,8 @@ namespace cnidaria { thread = 4 * 10 + 5 = 45 thread = 9 * 10 + 10 = 100 */ - baseInt num_threadsL = this->num_pieces * this->num_threads; - baseInt thread_numL = ( this->piece_num * this->num_threads ) + thread_num; + baseInt num_threadsL = this->num_pieces * this->num_threads; + baseInt thread_numL = ( this->piece_num * this->num_threads ) + thread_num; string_t name = (boost::format("merge jfs pc %3d/%3d thr %3d/%3d") % (piece_num+1) % this->num_pieces % (thread_num + 1) % this->num_threads).str(); diff --git a/test/Makefile b/test/Makefile index acfcb066fa57f7b72e52b558c9d7dad11a883f3e..9ee61c3329119ac96a77bd8785e40f4aac8f36c3 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,68 +1,167 @@ -CURR_DIR_N=$(abspath $(lastword $(MAKEFILE_LIST))) -CURR_DIR=$(patsubst %/,%,$(dir $(CURR_DIR_N))) -DATAFOLDER=$(CURR_DIR)/data/fungi -FILE_LIST=filelist.csv -DEF_FILE=test_def.csv -OUT_DIR=$(CURR_DIR)/out -KMER_SIZE=21 -NUM_PIECES=1 - -JELLYFISH=$(CURR_DIR)/../src/libs/Jellyfish/bin/jellyfish -HASH_SIZE=1G +#### +# ENVIRONMENT VARIABLES +#### +DATA_BASE ?= data +OUT_BASE ?= out +OUT_FOLDER ?= test +NUM_SAMPLE_ROWS ?= 40000 + +CNIDARIA_NUM_PIECES ?= 1 +CNIDARIA_EXTRA_FILES ?= + +JELLY_KMER_SIZE ?= 21 +JELLY_HASH_SIZE ?= 1G +JELLY_LOWER_COUNT_FA ?= 1 +JELLY_LOWER_COUNT_FQ ?= 2 +JELLY_THREADS ?= 5 +JELLY_COUNTER_LEN ?= 7 +JELLY_OUT_COUNTER_LEN ?= 1 +JELLY_NUM_FILES ?= 300 + + +# CREATE FULL DATABASE (SLOW) +ifdef CNIDARIA_GENDB +DODB=--export-complete +else +DODB= +endif +# SAMPLE THE DATA ifdef SAMPLEDATA -HASH_SIZE=256M +JELLY_HASH_SIZE=256M endif -ifdef NOIMAGE +# DO NOT EXPORT IMAGES +ifdef CNIDARIA_NOIMAGE DOIMAGE="--no-gen-image" else DOIMAGE= endif -COUNTER_LEN=7 -OUT_COUNTER_LEN=1 -#JCMD="${JELLYFISH} count -m ${MER_SIZE} -s ${HASH_SIZE} -t 5 -F 300 --disk --counter-len=${COUNTER_LEN} --out-counter-len=${OUT_COUNTER_LEN} --canonical" -#ulimit -Sn 4096 +#### +# PROGRAMS +#### +COMPRESS=pigz -k1 +DECOMPRESS=pigz -kdc + + + +#### +# WORK VARIABLES +#### +CURR_DIR_N=$(abspath $(lastword $(MAKEFILE_LIST))) +CURR_DIR=$(patsubst %/,%,$(dir $(CURR_DIR_N))) + +DATA_FOLDER=$(CURR_DIR)/$(DATA_BASE) + +FILE_LIST=$(CURR_DIR)/$(OUT_FOLDER)_filelist.csv +DEF_FILE=$(CURR_DIR)/$(OUT_FOLDER)_def.csv + +OUT_DIR=$(CURR_DIR)/$(OUT_BASE)/$(OUT_FOLDER) + +JELLYFISH=$(CURR_DIR)/../src/libs/Jellyfish/bin/jellyfish + OUT_MAKE=$(OUT_DIR)/Makefile -OUT_FILE=$(OUT_DIR)/test/test.json +OUT_FILE=$(OUT_DIR)/$(OUT_FOLDER)/$(OUT_FOLDER).json -INSH=$(wildcard $(DATAFOLDER)/*.sh) +#INFOLDERS=$(wildcard $(DATA_FOLDER)/*) +INFOLDERS=${shell find $(DATA_FOLDER) -mindepth 1 -type d -print} -INXZ=$(DATAFOLDER)/datafa.tar.xz +INJF=$(patsubst %,%/$(JELLY_KMER_SIZE).jf,$(INFOLDERS)) +INCNE=$(wildcard $(DATA_FOLDER)/*.cne) -INFA=$(patsubst %.sh,%.fasta,$(INSH)) +SHELL:=/bin/bash -INJF=$(patsubst %.sh,%.fasta.$(KMER_SIZE).jf,$(INSH)) +#### +# RULES +#### all: $(OUT_FILE) clean: - rm $(DATAFOLDER)/*.jf + rm $(DATA_FOLDER)/*/*.jf rm -rf $(OUT_DIR)/* print: - @echo CURR_DIR $(CURR_DIR) - @echo DATAFOLDER $(DATAFOLDER) - @echo FILE_LIST $(FILE_LIST) - @echo DEF_FILE $(DEF_FILE) - @echo OUT_DIR $(OUT_DIR) - @echo KMER_SIZE $(KMER_SIZE) - @echo NUM_PIECES $(NUM_PIECES) - @echo OUT_MAKE $(OUT_MAKE) - @echo OUT_FILE $(OUT_FILE) - @echo INSH $(INSH) + @echo "JELLY_KMER_SIZE $(JELLY_KMER_SIZE)" + @echo "JELLY_HASH_SIZE $(JELLY_HASH_SIZE)" + @echo "JELLY_LOWER_COUNT_FA $(JELLY_LOWER_COUNT_FA)" + @echo "JELLY_LOWER_COUNT_FQ $(JELLY_LOWER_COUNT_FQ)" + @echo "JELLY_THREADS $(JELLY_THREADS)" + @echo "JELLY_COUNTER_LEN $(JELLY_COUNTER_LEN)" + @echo "JELLY_OUT_COUNTER_LEN $(JELLY_OUT_COUNTER_LEN)" + @echo "JELLY_NUM_FILES $(JELLY_NUM_FILES)" + @echo + @echo "CNIDARIA_NUM_PIECES $(CNIDARIA_NUM_PIECES)" + @echo "CNIDARIA_EXTRA_FILES $(CNIDARIA_EXTRA_FILES)" + +ifdef CNIDARIA_NOIMAGE + @echo "CNIDARIA_NOIMAGE true" +else + @echo "CNIDARIA_NOIMAGE false" +endif + + +ifdef CNIDARIA_GENDB + @echo "CNIDARIA_NOIMAGE true" +else + @echo "CNIDARIA_NOIMAGE false" +endif + + +ifdef SAMPLEDATA + @echo + @echo "NUM_SAMPLE_ROWS $(NUM_SAMPLE_ROWS)" +endif + + @echo + @echo "CURR_DIR $(CURR_DIR)" + @echo "DATA_BASE $(DATA_BASE)" + @echo "DATA_FOLDER $(DATA_FOLDER)" + @echo "FILE_LIST $(FILE_LIST)" + @echo "DEF_FILE $(DEF_FILE)" + @echo "OUT_MAKE $(OUT_MAKE)" + @echo "OUT_BASE $(OUT_BASE)" + @echo "OUT_FILE $(OUT_FILE)" + @echo "OUT_DIR $(OUT_DIR)" + @echo "OUT_FOLDER $(OUT_FOLDER)" @echo - @echo INFA $(INFA) + @echo "INFOLDERS $(INFOLDERS)" @echo - @echo INJF $(INJF) + @echo "INJF $(INJF)" + @echo "INCNE $(INCNE)" + + +help: + @echo all, clean, print, help, json, maker, jfs + @echo + @echo + @echo "DATA_BASE ?= data - base path for input" + @echo "OUT_BASE ?= out - base path for output" + @echo "OUT_FOLDER ?= test - project name" + @echo "NUM_SAMPLE_ROWS ?= 40000 - number of rows to sample if sampling, must be multiple of 4" + @echo "SAMPLEDATA - sample the data" + @echo + @echo "CNIDARIA_NUM_PIECES ?= 1 - number of pieces for cnidaria to multithread" + @echo "CNIDARIA_NOIMAGE - do not export images" + @echo "CNIDARIA_GENDB - create full database [slow]" + @echo "CNIDARIA_EXTRA_FILES - add extra files to cnidaria" + @echo + @echo "JELLY_KMER_SIZE ?= 21 - kmer size" + @echo "JELLY_HASH_SIZE ?= 1G - hash size" + @echo "JELLY_LOWER_COUNT_FA ?= 1 - lower count for fastA files" + @echo "JELLY_LOWER_COUNT_FQ ?= 2 - lower count for fastQ files" + @echo "JELLY_THREADS ?= 5 - number of threads" + @echo "JELLY_COUNTER_LEN ?= 7 - counter len" + @echo "JELLY_OUT_COUNTER_LEN ?= 1 - out counter len" + @echo "JELLY_NUM_FILES ?= 300 - number of disk files" .PHONY: json makefile fastas maker jfs @@ -73,30 +172,71 @@ maker: $(OUT_MAKE) jfs: $(INJF) -fastas: $(INFA) +#$(DATA_FOLDER)/%.fasta: $(DATA_FOLDER)/%.sh +# test -f $@ || ( cd $(DATA_FOLDER) && bash $? && test -e $@ ) -#$(DATAFOLDER)/%.fasta: $(DATAFOLDER)/%.sh -# test -f $@ || ( cd $(DATAFOLDER) && bash $? && test -e $@ ) +ifdef SAMPLEDATA +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fq.gz + $(DECOMPRESS) $? | head -$(NUM_SAMPLE_ROWS) | $(COMPRESS) > $(DATA_FOLDER)/$*/head.fqh.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FQ) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) head.fqh.gz) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fastq.gz + $(DECOMPRESS) $? | head -$(NUM_SAMPLE_ROWS) | $(COMPRESS) > $(DATA_FOLDER)/$*/head.fqh.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FQ) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) head.fqh.gz) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fasta.gz + $(DECOMPRESS) $? | head -$(NUM_SAMPLE_ROWS) | $(COMPRESS) > $(DATA_FOLDER)/$*/head.fah.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FA) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) head.fah.gz) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fa.gz + $(DECOMPRESS) $? | head -$(NUM_SAMPLE_ROWS) | $(COMPRESS) > $(DATA_FOLDER)/$*/head.fah.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FQ) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <(pigz -kcd head.fah.gz) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fasta + head -$(NUM_SAMPLE_ROWS) $? | $(COMPRESS) > $(DATA_FOLDER)/$*/head.fah.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FA) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) head.fah.gz) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fa + head -$(NUM_SAMPLE_ROWS) $? | $(COMPRESS) > $(DATA_FOLDER)/$*/head.fah.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FQ) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) head.fah.gz) && mv $@.tmp $@ ) -$(INFA): - cd $(DATAFOLDER) && tar xvf $(INXZ) --keep-newer-files -ifdef SAMPLEDATA -$(DATAFOLDER)/%.fasta.$(KMER_SIZE).jf: $(DATAFOLDER)/%.fasta - head -10000 $? > $?.head.fa - test -f $@ || ( cd $(DATAFOLDER) && ulimit -Sn 4096 && ${JELLYFISH} count -m $(KMER_SIZE) -s $(HASH_SIZE) -t 5 -F 300 --disk --counter-len=$(COUNTER_LEN) --out-counter-len=$(OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming $?.head.fa && mv $@.tmp $@ ) else -$(DATAFOLDER)/%.fasta.$(KMER_SIZE).jf: $(DATAFOLDER)/%.fasta - test -f $@ || ( cd $(DATAFOLDER) && ulimit -Sn 4096 && ${JELLYFISH} count -m $(KMER_SIZE) -s $(HASH_SIZE) -t 5 -F 300 --disk --counter-len=$(COUNTER_LEN) --out-counter-len=$(OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming $? && mv $@.tmp $@ ) +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fq.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FQ) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) $?) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fastq.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FQ) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) $?) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fasta.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FA) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) $?) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fa.gz + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FA) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming <($(DECOMPRESS) $?) && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fasta + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FA) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming $? && mv $@.tmp $@ ) + +$(DATA_FOLDER)/%/$(JELLY_KMER_SIZE).jf: $(DATA_FOLDER)/%/*.fa + test -f $@ || ( cd $(DATA_FOLDER)/$* && ulimit -Sn 4096 && $(JELLYFISH) count -m $(JELLY_KMER_SIZE) -s $(JELLY_HASH_SIZE) --lower-count=$(JELLY_LOWER_COUNT_FA) -t $(JELLY_THREADS) -F $(JELLY_NUM_FILES) --disk --counter-len=$(JELLY_COUNTER_LEN) --out-counter-len=$(JELLY_OUT_COUNTER_LEN) --canonical -o $@.tmp --timing=$@.timming $? && mv $@.tmp $@ ) + + endif -$(OUT_MAKE): $(FILE_LIST) $(DEF_FILE) - ../scripts/gen_mkfile.py $(FILE_LIST) $(DEF_FILE) $(OUT_DIR) $(KMER_SIZE) $(NUM_PIECES) $(DOIMAGE) + + + +$(OUT_MAKE): $(INJF) $(INCNE) + echo $(INJF) | tr " " "\n" | xargs -n1 -L1 -I{} bash -c 'dn=`dirname {}`; bn=`basename $$dn`; echo -e "{}\t$$bn"' > $(FILE_LIST).tmp && mv $(FILE_LIST).tmp $(FILE_LIST) + echo $(CNIDARIA_EXTRA_FILES) $(INCNE) | tr " " "\n" | xargs -n1 -L1 -I{} bash -c ' bn=`basename {}`; echo -e "{}\t$$bn"' >> $(FILE_LIST) + echo $(INJF) $(CNIDARIA_EXTRA_FILES) $(INCNE) | tr " " "\n" | sed 's/\.cne//' | xargs -n1 -L1 -I{} bash -c 'echo -e "$(OUT_FOLDER)\t{}"' > $(DEF_FILE).tmp && mv $(DEF_FILE).tmp $(DEF_FILE) + $(CURR_DIR)/../scripts/gen_mkfile.py $(FILE_LIST) $(DEF_FILE) $(OUT_DIR) $(JELLY_KMER_SIZE) $(CNIDARIA_NUM_PIECES) $(DOIMAGE) $(DODB) + $(OUT_FILE): $(OUT_MAKE) $(INJF) - $(MAKE) -C $(OUT_DIR)/test + $(MAKE) -C $(OUT_DIR)/$(OUT_FOLDER) diff --git a/test/data/.gitignore b/test/data/.gitignore index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..56fb7b15403698588fc129d474d975c5aaa27621 100644 --- a/test/data/.gitignore +++ b/test/data/.gitignore @@ -0,0 +1,17 @@ +*.fasta +./Aspergillus_fumigatus_uid14003 +./Aspergillus_nidulans_FGSC_A4_uid13961 +./Aspergillus_niger_CBS_513_88_uid19263 +./Aspergillus_oryzae_RIB40_uid28175 +./Candida_dubliniensis_CD36_uid38659 +./Candida_glabrata_CBS138_uid12376 +./Candida_glabrata +./Cryptococcus_gattii_WM276 +./Cryptococcus_neoformans_var_JEC21_uid10698 +./Kluyveromyces_lactis_NRRL_Y-1140_uid12377 +./Neurospora_crassa_uid132 +./Saccharomyces_cerevisiae_uid128 +./Schizosaccharomyces_pombe_uid127 +./Yarrowia_lipolytica_CLIB122_uid12414 +./Zygosaccharomyces_rouxii_CBS_732_uid39573 + diff --git a/test/data/fungi/.gitignore b/test/data/fungi/.gitignore deleted file mode 100644 index 56fb7b15403698588fc129d474d975c5aaa27621..0000000000000000000000000000000000000000 --- a/test/data/fungi/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -*.fasta -./Aspergillus_fumigatus_uid14003 -./Aspergillus_nidulans_FGSC_A4_uid13961 -./Aspergillus_niger_CBS_513_88_uid19263 -./Aspergillus_oryzae_RIB40_uid28175 -./Candida_dubliniensis_CD36_uid38659 -./Candida_glabrata_CBS138_uid12376 -./Candida_glabrata -./Cryptococcus_gattii_WM276 -./Cryptococcus_neoformans_var_JEC21_uid10698 -./Kluyveromyces_lactis_NRRL_Y-1140_uid12377 -./Neurospora_crassa_uid132 -./Saccharomyces_cerevisiae_uid128 -./Schizosaccharomyces_pombe_uid127 -./Yarrowia_lipolytica_CLIB122_uid12414 -./Zygosaccharomyces_rouxii_CBS_732_uid39573 - diff --git a/test/data/fungi/Aspergillus_fumigatus_uid14003.sh b/test/data/fungi/Aspergillus_fumigatus_uid14003.sh deleted file mode 100644 index 6003c1ba6ba87a60bfcadef415584b962dad3cd0..0000000000000000000000000000000000000000 --- a/test/data/fungi/Aspergillus_fumigatus_uid14003.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Aspergillus_fumigatus_uid14003 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Aspergillus_fumigatus_uid14003/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Aspergillus_nidulans_FGSC_A4_uid13961.sh b/test/data/fungi/Aspergillus_nidulans_FGSC_A4_uid13961.sh deleted file mode 100644 index a0416fbffaffc46bc92940314f3c6b3779e363f3..0000000000000000000000000000000000000000 --- a/test/data/fungi/Aspergillus_nidulans_FGSC_A4_uid13961.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Aspergillus_nidulans_FGSC_A4_uid13961 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Aspergillus_nidulans_FGSC_A4_uid13961/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Aspergillus_niger_CBS_513_88_uid19263.sh b/test/data/fungi/Aspergillus_niger_CBS_513_88_uid19263.sh deleted file mode 100644 index 0c6fa6d9af21f91e9626ae693f9b1310185abc5b..0000000000000000000000000000000000000000 --- a/test/data/fungi/Aspergillus_niger_CBS_513_88_uid19263.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Aspergillus_niger_CBS_513_88_uid19263 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Aspergillus_niger_CBS_513_88_uid19263/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Aspergillus_oryzae_RIB40_uid28175.sh b/test/data/fungi/Aspergillus_oryzae_RIB40_uid28175.sh deleted file mode 100644 index bd0f35d6f0610da95ccaa8f4d5dc7ca233141240..0000000000000000000000000000000000000000 --- a/test/data/fungi/Aspergillus_oryzae_RIB40_uid28175.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Aspergillus_oryzae_RIB40_uid28175 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Aspergillus_oryzae_RIB40_uid28175/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Candida_dubliniensis_CD36_uid38659.sh b/test/data/fungi/Candida_dubliniensis_CD36_uid38659.sh deleted file mode 100644 index 946bdd2ceee44ba2d2279aef2e7be1153894b32a..0000000000000000000000000000000000000000 --- a/test/data/fungi/Candida_dubliniensis_CD36_uid38659.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Candida_dubliniensis_CD36_uid38659 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Candida_dubliniensis_CD36_uid38659/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Candida_glabrata.sh b/test/data/fungi/Candida_glabrata.sh deleted file mode 100644 index 6a84ae96a8da4ae2d07008dc88bdec710efc8121..0000000000000000000000000000000000000000 --- a/test/data/fungi/Candida_glabrata.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Candida_glabrata -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Candida_glabrata/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Candida_glabrata_CBS138_uid12376.sh b/test/data/fungi/Candida_glabrata_CBS138_uid12376.sh deleted file mode 100644 index 8c6f5a335020e4ac5ced1798a1b242ceb9e7bfb5..0000000000000000000000000000000000000000 --- a/test/data/fungi/Candida_glabrata_CBS138_uid12376.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Candida_glabrata_CBS138_uid12376 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Candida_glabrata_CBS138_uid12376/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Cryptococcus_gattii_WM276.sh b/test/data/fungi/Cryptococcus_gattii_WM276.sh deleted file mode 100644 index 65fcc5aaa158b99fd4ed7ff0011321541e3a629d..0000000000000000000000000000000000000000 --- a/test/data/fungi/Cryptococcus_gattii_WM276.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Cryptococcus_gattii_WM276 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Cryptococcus_gattii_WM276/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Cryptococcus_neoformans_var_JEC21_uid10698.sh b/test/data/fungi/Cryptococcus_neoformans_var_JEC21_uid10698.sh deleted file mode 100644 index b2ff939c9ee009e6116de833990eb8204db475fc..0000000000000000000000000000000000000000 --- a/test/data/fungi/Cryptococcus_neoformans_var_JEC21_uid10698.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Cryptococcus_neoformans_var_JEC21_uid10698 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Cryptococcus_neoformans_var_JEC21_uid10698/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Kluyveromyces_lactis_NRRL_Y-1140_uid12377.sh b/test/data/fungi/Kluyveromyces_lactis_NRRL_Y-1140_uid12377.sh deleted file mode 100644 index 96016eb34da28c0fb596e822e0c30a992cc0f60a..0000000000000000000000000000000000000000 --- a/test/data/fungi/Kluyveromyces_lactis_NRRL_Y-1140_uid12377.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Kluyveromyces_lactis_NRRL_Y-1140_uid12377 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Kluyveromyces_lactis_NRRL_Y-1140_uid12377/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Neurospora_crassa_uid132.sh b/test/data/fungi/Neurospora_crassa_uid132.sh deleted file mode 100644 index c7864f27dd595a27930fd02c9591026af04f1a95..0000000000000000000000000000000000000000 --- a/test/data/fungi/Neurospora_crassa_uid132.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Neurospora_crassa_uid132 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Neurospora_crassa_uid132/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Saccharomyces_cerevisiae_uid128.sh b/test/data/fungi/Saccharomyces_cerevisiae_uid128.sh deleted file mode 100644 index 72282434f9565b97b5c36e550435a19c2026b81f..0000000000000000000000000000000000000000 --- a/test/data/fungi/Saccharomyces_cerevisiae_uid128.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Saccharomyces_cerevisiae_uid128 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Saccharomyces_cerevisiae_uid128/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Schizosaccharomyces_pombe_uid127.sh b/test/data/fungi/Schizosaccharomyces_pombe_uid127.sh deleted file mode 100644 index 2f3b48ecc7c0ab76385877d7c048a5172ad9b92c..0000000000000000000000000000000000000000 --- a/test/data/fungi/Schizosaccharomyces_pombe_uid127.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Schizosaccharomyces_pombe_uid127 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Schizosaccharomyces_pombe_uid127/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Yarrowia_lipolytica_CLIB122_uid12414.sh b/test/data/fungi/Yarrowia_lipolytica_CLIB122_uid12414.sh deleted file mode 100644 index f6468206ff792a57871825ecc600e867637d1508..0000000000000000000000000000000000000000 --- a/test/data/fungi/Yarrowia_lipolytica_CLIB122_uid12414.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Yarrowia_lipolytica_CLIB122_uid12414 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Yarrowia_lipolytica_CLIB122_uid12414/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/Zygosaccharomyces_rouxii_CBS_732_uid39573.sh b/test/data/fungi/Zygosaccharomyces_rouxii_CBS_732_uid39573.sh deleted file mode 100644 index d90e43b06a3c2906498ab4827b64bd84584165b3..0000000000000000000000000000000000000000 --- a/test/data/fungi/Zygosaccharomyces_rouxii_CBS_732_uid39573.sh +++ /dev/null @@ -1,8 +0,0 @@ -set -xeu -SPP=Zygosaccharomyces_rouxii_CBS_732_uid39573 -PREFIX=ftp://ftp.ncbi.nih.gov/genomes/Fungi/Zygosaccharomyces_rouxii_CBS_732_uid39573/ -if [[ -f "$SPP.fasta" ]]; then exit 0; fi -mkdir -p $SPP -cd $SPP -wget --no-clobber --continue --timeout=60 --tries=2 --random-wait ${PREFIX}*.fna -cat *.fna > ../$SPP.fasta diff --git a/test/data/fungi/datafa.tar.xz b/test/data/fungi/datafa.tar.xz deleted file mode 100644 index 286dc094710474023b3702ee896043ced52e67d4..0000000000000000000000000000000000000000 Binary files a/test/data/fungi/datafa.tar.xz and /dev/null differ diff --git a/test/test_def.csv b/test/test_def.csv deleted file mode 100644 index 1f7f1a1102e1d18ee89c5d53106df951165a9df0..0000000000000000000000000000000000000000 --- a/test/test_def.csv +++ /dev/null @@ -1,18 +0,0 @@ -#RAW_EXTERNAL_FUNGI=data/fungi -#EXT=.%(kmer_size)s.jf - -#FUNGI -test %(RAW_EXTERNAL_FUNGI)s/Aspergillus_fumigatus_uid14003.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Aspergillus_nidulans_FGSC_A4_uid13961.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Aspergillus_niger_CBS_513_88_uid19263.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Aspergillus_oryzae_RIB40_uid28175.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Candida_dubliniensis_CD36_uid38659.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Candida_glabrata_CBS138_uid12376.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Cryptococcus_gattii_WM276.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Cryptococcus_neoformans_var_JEC21_uid10698.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Kluyveromyces_lactis_NRRL_Y-1140_uid12377.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Neurospora_crassa_uid132.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Saccharomyces_cerevisiae_uid128.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Schizosaccharomyces_pombe_uid127.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Yarrowia_lipolytica_CLIB122_uid12414.fasta%(EXT)s -test %(RAW_EXTERNAL_FUNGI)s/Zygosaccharomyces_rouxii_CBS_732_uid39573.fasta%(EXT)s