Commit 01400151 authored by sauloal's avatar sauloal
Browse files

first

parents
*.o
*.tar.gz
bed/
bedToBigBed2/
inc/
jksrc.zip
kent/
The MIT License (MIT)
Copyright (c) 2016 Saulo Aflitos
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
BedTools (http://genome.ucsc.edu/admin/git.html - http://hgdownload.cse.ucsc.edu/admin/jksrc.zip) to javascript using Emscripten (LLVM) in docker (https://github.com/kripken/emscripten)
/* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */
/* Copyright (C) 2013 The Regents of the University of California
* See README in this or parent directory for licensing information. */
#include "common.h"
#include "obscure.h"
#include "memalloc.h"
#include "linefile.h"
#include "localmem.h"
#include "hash.h"
#include "options.h"
#include "sqlNum.h"
#include "dystring.h"
#include "cirTree.h"
#include "sig.h"
#include "zlibFace.h"
#include "bPlusTree.h"
#include "bbiFile.h"
#include "bwgInternal.h"
#include "bigWig.h"
static int blockSize = 256;
static int itemsPerSlot = 1024;
static boolean doCompress = FALSE;
static int maxGigs = 100; // Maximum number of gigs to allocate in one block.
// Undocumented on purpose.
void usage()
/* Explain usage and exit. */
{
errAbort(
"bedGraphToBigWig v %d - Convert a bedGraph file to bigWig format.\n"
"usage:\n"
" bedGraphToBigWig in.bedGraph chrom.sizes out.bw\n"
"where in.bedGraph is a four column file in the format:\n"
" <chrom> <start> <end> <value>\n"
"and chrom.sizes is a two-column file/URL: <chromosome name> <size in bases>\n"
"and out.bw is the output indexed big wig file.\n"
"If the assembly <db> is hosted by UCSC, chrom.sizes can be a URL like\n"
" http://hgdownload.cse.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes\n"
"or you may use the script fetchChromSizes to download the chrom.sizes file.\n"
"If not hosted by UCSC, a chrom.sizes file can be generated by running\n"
"twoBitInfo on the assembly .2bit file.\n"
"The input bedGraph file must be sorted, use the unix sort command:\n"
" sort -k1,1 -k2,2n unsorted.bedGraph > sorted.bedGraph\n"
"options:\n"
" -blockSize=N - Number of items to bundle in r-tree. Default %d\n"
" -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n"
" -unc - If set, do not use compression."
, bbiCurrentVersion, blockSize, itemsPerSlot
);
}
static struct optionSpec options[] = {
{"blockSize", OPTION_INT},
{"itemsPerSlot", OPTION_INT},
{"unc", OPTION_BOOLEAN},
{"maxGigs", OPTION_INT},
{NULL, 0},
};
struct sectionItem
/* An item in a section of a bedGraph. */
{
bits32 start, end; /* Position in chromosome, half open. */
float val; /* Single precision value. */
};
void writeSections(struct bbiChromUsage *usageList, struct lineFile *lf,
int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, FILE *f,
int resTryCount, int resScales[], int resSizes[],
boolean doCompress, bits32 *retMaxSectionSize)
/* Read through lf, chunking it into sections that get written to f. Save info
* about sections in bounds. */
{
int maxSectionSize = 0;
struct bbiChromUsage *usage = usageList;
int itemIx = 0, sectionIx = 0;
bits32 reserved32 = 0;
UBYTE reserved8 = 0;
struct sectionItem items[itemsPerSlot];
struct sectionItem *lastB = NULL;
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
resEnds[resTry] = 0;
struct dyString *stream = dyStringNew(0);
/* remove initial browser and track lines */
lineFileRemoveInitialCustomTrackLines(lf);
for (;;)
{
/* Get next line of input if any. */
char *row[5];
int rowSize = lineFileChopNext(lf, row, ArraySize(row));
/* Figure out whether need to output section. */
boolean sameChrom = FALSE;
if (rowSize > 0)
sameChrom = sameString(row[0], usage->name);
if (itemIx >= itemsPerSlot || rowSize == 0 || !sameChrom)
{
/* Figure out section position. */
bits32 chromId = usage->id;
bits32 sectionStart = items[0].start;
bits32 sectionEnd = items[itemIx-1].end;
/* Save section info for indexing. */
assert(sectionIx < sectionCount);
struct bbiBoundsArray *section = &bounds[sectionIx++];
section->offset = ftell(f);
section->range.chromIx = chromId;
section->range.start = sectionStart;
section->range.end = sectionEnd;
/* Output section header to stream. */
dyStringClear(stream);
UBYTE type = bwgTypeBedGraph;
bits16 itemCount = itemIx;
dyStringWriteOne(stream, chromId); // chromId
dyStringWriteOne(stream, sectionStart); // start
dyStringWriteOne(stream, sectionEnd); // end
dyStringWriteOne(stream, reserved32); // itemStep
dyStringWriteOne(stream, reserved32); // itemSpan
dyStringWriteOne(stream, type); // type
dyStringWriteOne(stream, reserved8); // reserved
dyStringWriteOne(stream, itemCount); // itemCount
/* Output each item in section to stream. */
int i;
for (i=0; i<itemIx; ++i)
{
struct sectionItem *item = &items[i];
dyStringWriteOne(stream, item->start);
dyStringWriteOne(stream, item->end);
dyStringWriteOne(stream, item->val);
}
/* Save stream to file, compressing if need be. */
if (stream->stringSize > maxSectionSize)
maxSectionSize = stream->stringSize;
if (doCompress)
{
size_t maxCompSize = zCompBufSize(stream->stringSize);
char compBuf[maxCompSize];
int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
mustWrite(f, compBuf, compSize);
}
else
mustWrite(f, stream->string, stream->stringSize);
/* If at end of input we are done. */
if (rowSize == 0)
break;
/* Set up for next section. */
itemIx = 0;
if (!sameChrom)
{
usage = usage->next;
assert(usage != NULL);
if (!sameString(row[0], usage->name))
errAbort("read %s, expecting %s on line %d in file %s\n",
row[0], usage->name, lf->lineIx, lf->fileName);
assert(sameString(row[0], usage->name));
lastB = NULL;
for (resTry = 0; resTry < resTryCount; ++resTry)
resEnds[resTry] = 0;
}
}
/* Parse out input. */
lineFileExpectWords(lf, 4, rowSize);
bits32 start = lineFileNeedNum(lf, row, 1);
bits32 end = lineFileNeedNum(lf, row, 2);
float val = lineFileNeedDouble(lf, row, 3);
/* Verify that inputs meets our assumption - that it is a sorted bedGraph file. */
if (start > end)
errAbort("Start (%u) after end (%u) line %d of %s", start, end, lf->lineIx, lf->fileName);
if (lastB != NULL)
{
if (lastB->start > start)
errAbort("BedGraph not sorted on start line %d of %s", lf->lineIx, lf->fileName);
if (lastB->end > start)
errAbort("Overlapping regions in bedGraph line %d of %s", lf->lineIx, lf->fileName);
}
/* Do zoom counting. */
for (resTry = 0; resTry < resTryCount; ++resTry)
{
bits32 resEnd = resEnds[resTry];
if (start >= resEnd)
{
resSizes[resTry] += 1;
resEnds[resTry] = resEnd = start + resScales[resTry];
}
while (end > resEnd)
{
resSizes[resTry] += 1;
resEnds[resTry] = resEnd = resEnd + resScales[resTry];
}
}
/* Save values in output array. */
struct sectionItem *b = &items[itemIx];
b->start = start;
b->end = end;
b->val = val;
lastB = b;
itemIx += 1;
}
assert(sectionIx == sectionCount);
*retMaxSectionSize = maxSectionSize;
}
static struct bbiSummary *bedGraphWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList,
int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount,
int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress,
struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart,
struct bbiSummaryElement *totalSum)
/* Write out data reduced by factor of initialReduction. Also calculate and keep in memory
* next reduction level. This is more work than some ways, but it keeps us from having to
* keep the first reduction entirely in memory. */
{
struct bbiSummary *twiceReducedList = NULL;
bits32 doubleReductionSize = initialReduction * zoomIncrement;
struct bbiChromUsage *usage = usageList;
struct bbiSummary oneSummary, *sum = NULL;
struct bbiBoundsArray *boundsArray, *boundsPt, *boundsEnd;
boundsPt = AllocArray(boundsArray, initialReductionCount);
boundsEnd = boundsPt + initialReductionCount;
*retDataStart = ftell(f);
writeOne(f, initialReductionCount);
boolean firstRow = TRUE;
struct bbiSumOutStream *stream = bbiSumOutStreamOpen(itemsPerSlot, f, doCompress);
/* remove initial browser and track lines */
lineFileRemoveInitialCustomTrackLines(lf);
for (;;)
{
/* Get next line of input if any. */
char *row[5];
int rowSize = lineFileChopNext(lf, row, ArraySize(row));
/* Output last section and break if at end of file. */
if (rowSize == 0 && sum != NULL)
{
bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize,
&boundsPt, boundsEnd, lm, stream);
break;
}
/* Parse out row. */
char *chrom = row[0];
bits32 start = sqlUnsigned(row[1]);
bits32 end = sqlUnsigned(row[2]);
float val = sqlFloat(row[3]);
/* Update total summary stuff. */
bits32 size = end-start;
if (firstRow)
{
totalSum->validCount = size;
totalSum->minVal = totalSum->maxVal = val;
totalSum->sumData = val*size;
totalSum->sumSquares = val*val*size;
firstRow = FALSE;
}
else
{
totalSum->validCount += size;
if (val < totalSum->minVal) totalSum->minVal = val;
if (val > totalSum->maxVal) totalSum->maxVal = val;
totalSum->sumData += val*size;
totalSum->sumSquares += val*val*size;
}
/* If new chromosome output existing block. */
if (differentString(chrom, usage->name))
{
usage = usage->next;
bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize,
&boundsPt, boundsEnd, lm, stream);
sum = NULL;
}
/* If start past existing block then output it. */
else if (sum != NULL && sum->end <= start)
{
bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize,
&boundsPt, boundsEnd, lm, stream);
sum = NULL;
}
/* If don't have a summary we're working on now, make one. */
if (sum == NULL)
{
oneSummary.chromId = usage->id;
oneSummary.start = start;
oneSummary.end = start + initialReduction;
if (oneSummary.end > usage->size) oneSummary.end = usage->size;
oneSummary.minVal = oneSummary.maxVal = val;
oneSummary.sumData = oneSummary.sumSquares = 0.0;
oneSummary.validCount = 0;
sum = &oneSummary;
}
/* Deal with case where might have to split an item between multiple summaries. This
* loop handles all but the final affected summary in that case. */
while (end > sum->end)
{
verbose(3, "Splitting start %d, end %d, sum->start %d, sum->end %d\n", start, end, sum->start, sum->end);
/* Fold in bits that overlap with existing summary and output. */
bits32 overlap = rangeIntersection(start, end, sum->start, sum->end);
sum->validCount += overlap;
if (sum->minVal > val) sum->minVal = val;
if (sum->maxVal < val) sum->maxVal = val;
sum->sumData += val * overlap;
sum->sumSquares += val*val * overlap;
bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize,
&boundsPt, boundsEnd, lm, stream);
size -= overlap;
/* Move summary to next part. */
sum->start = start = sum->end;
sum->end = start + initialReduction;
if (sum->end > usage->size) sum->end = usage->size;
sum->minVal = sum->maxVal = val;
sum->sumData = sum->sumSquares = 0.0;
sum->validCount = 0;
}
/* Add to summary. */
sum->validCount += size;
if (sum->minVal > val) sum->minVal = val;
if (sum->maxVal < val) sum->maxVal = val;
sum->sumData += val * size;
sum->sumSquares += val*val * size;
}
bbiSumOutStreamClose(&stream);
/* Write out 1st zoom index. */
int indexOffset = *retIndexStart = ftell(f);
assert(boundsPt == boundsEnd);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), initialReductionCount,
blockSize, itemsPerSlot, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset,
indexOffset, f);
freez(&boundsArray);
slReverse(&twiceReducedList);
return twiceReducedList;
}
void bedGraphToBigWig(char *inName, char *chromSizes, char *outName)
/* bedGraphToBigWig - Convert a bedGraph program to bigWig.. */
{
verboseTimeInit();
struct lineFile *lf = lineFileOpen(inName, TRUE);
struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes);
verbose(2, "%d chroms in %s\n", chromSizesHash->elCount, chromSizes);
int minDiff = 0, i;
double aveSize = 0;
bits64 bedCount = 0;
bits32 uncompressBufSize = 0;
struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, chromSizesHash, NULL,
&minDiff, &aveSize, &bedCount, FALSE);
verboseTime(2, "pass1");
verbose(2, "%d chroms in %s, minDiff=%d, aveSize=%g, bedCount=%lld\n",
slCount(usageList), inName, minDiff, aveSize, bedCount);
/* Write out dummy header, zoom offsets. */
FILE *f = mustOpen(outName, "wb");
bbiWriteDummyHeader(f);
bbiWriteDummyZooms(f);
/* Write out dummy total summary. */
struct bbiSummaryElement totalSum;
ZeroVar(&totalSum);
bits64 totalSummaryOffset = ftell(f);
bbiSummaryElementWrite(f, &totalSum);
/* Write out chromosome/size database. */
bits64 chromTreeOffset = ftell(f);
bbiWriteChromInfo(usageList, blockSize, f);
/* Set up to keep track of possible initial reduction levels. */
int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels];
int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes);
/* Write out primary full resolution data in sections, collect stats to use for reductions. */
bits64 dataOffset = ftell(f);
bits64 sectionCount = bbiCountSectionsNeeded(usageList, itemsPerSlot);
writeOne(f, sectionCount);
struct bbiBoundsArray *boundsArray;
AllocArray(boundsArray, sectionCount);
lineFileRewind(lf);
bits32 maxSectionSize = 0;
writeSections(usageList, lf, itemsPerSlot, boundsArray, sectionCount, f,
resTryCount, resScales, resSizes, doCompress, &maxSectionSize);
verboseTime(2, "pass2");
/* Write out primary data index. */
bits64 indexOffset = ftell(f);
cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), sectionCount,
blockSize, 1, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset,
indexOffset, f);
verboseTime(2, "index write");
/* Declare arrays and vars that track the zoom levels we actually output. */
bits32 zoomAmounts[bbiMaxZoomLevels];
bits64 zoomDataOffsets[bbiMaxZoomLevels];
bits64 zoomIndexOffsets[bbiMaxZoomLevels];
/* Call monster zoom maker library function that bedToBigBed also uses. */
int zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot,
bedGraphWriteReducedOnceReturnReducedTwice, 4,
doCompress, indexOffset - dataOffset,
usageList, resTryCount, resScales, resSizes,
zoomAmounts, zoomDataOffsets, zoomIndexOffsets, &totalSum);
/* Figure out buffer size needed for uncompression if need be. */
if (doCompress)
{
int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk);
uncompressBufSize = max(maxSectionSize, maxZoomUncompSize);
}
/* Go back and rewrite header. */
rewind(f);
bits32 sig = bigWigSig;
bits16 version = bbiCurrentVersion;
bits16 summaryCount = zoomLevels;
bits16 reserved16 = 0;
bits32 reserved32 = 0;
bits64 reserved64 = 0;
/* Write fixed header */
writeOne(f, sig);
writeOne(f, version);
writeOne(f, summaryCount);
writeOne(f, chromTreeOffset);
writeOne(f, dataOffset);
writeOne(f, indexOffset);
writeOne(f, reserved16); // fieldCount
writeOne(f, reserved16); // definedFieldCount
writeOne(f, reserved64); // autoSqlOffset
writeOne(f, totalSummaryOffset);
writeOne(f, uncompressBufSize);
writeOne(f, reserved64); // nameIndexOffset
assert(ftell(f) == 64);
/* Write summary headers with data. */
verbose(2, "Writing %d levels of zoom\n", zoomLevels);
for (i=0; i<zoomLevels; ++i)
{
verbose(3, "zoomAmounts[%d] = %d\n", i, (int)zoomAmounts[i]);
writeOne(f, zoomAmounts[i]);
writeOne(f, reserved32);
writeOne(f, zoomDataOffsets[i]);
writeOne(f, zoomIndexOffsets[i]);
}
/* Write rest of summary headers with no data. */
for (i=zoomLevels; i<bbiMaxZoomLevels; ++i)
{
writeOne(f, reserved32);
writeOne(f, reserved32);
writeOne(f, reserved64);
writeOne(f, reserved64);
}
/* Write total summary. */
fseek(f, totalSummaryOffset, SEEK_SET);
bbiSummaryElementWrite(f, &totalSum);
/* Write end signature. */
fseek(f, 0L, SEEK_END);
writeOne(f, sig);
lineFileClose(&lf);
carefulClose(&f);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
maxGigs = optionInt("maxGigs", maxGigs);
setMaxAlloc(maxGigs*1000000000L);
blockSize = optionInt("blockSize", blockSize);
itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
doCompress = !optionExists("unc");
if (argc != 4)
usage();
bedGraphToBigWig(argv[1], argv[2], argv[3]);
if (verboseLevel() > 1)
printVmPeak();
return 0;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/* e_os2.h */
/* ====================================================================
* Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com).