diff --git a/README.md b/README.md index 87cf666ee6..4310b5498e 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,6 @@ Broadly speaking the majority of the code is covered under the MIT license with * Much of the CRAM code is under the Apache License, Version 2 * Core `tribble` code (underlying VCF reading/writing amongst other things) is under LGPL -* Code supporting the reading/writing of SRA format is uncopyrighted & public domain ### Java Minimum Version Support Policy diff --git a/build.gradle b/build.gradle index 799bc0d50c..5bdaa0561e 100644 --- a/build.gradle +++ b/build.gradle @@ -51,7 +51,6 @@ dependencies { implementation "org.json:json:20231013" implementation 'org.openjdk.nashorn:nashorn-core:15.4' - api "gov.nih.nlm.ncbi:ngs-java:2.9.0" api "org.apache.commons:commons-jexl:2.1.1" testImplementation 'org.testng:testng:7.8.0' @@ -225,13 +224,13 @@ tasks.register('testWithOptimisticVCF4_4', Test) { } test { - description = "Runs the unit tests other than the SRA tests" + description = "Runs the unit tests other than external-API tests" useTestNG { if (OperatingSystem.current().isUnix()) { - excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "sra", "ena", "htsget" + excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "ena", "htsget" } else { - excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "sra", "ena", "htsget", "unix" + excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "ena", "htsget", "unix" } parallel = "classes" threadCount = Runtime.runtime.availableProcessors() @@ -250,13 +249,12 @@ tasks.register('testFTP', Test) { } tasks.register('testExternalApis', Test) { - description = "Run the SRA, ENA, and HTTP tests (tests that interact with external APIs)" + description = "Run the ENA and HTTP tests (tests that interact with external APIs)" testClassesDirs = sourceSets.test.output.classesDirs classpath = sourceSets.test.runtimeClasspath - jvmArgs += '-Dsamjdk.sra_libraries_download=true' useTestNG { - includeGroups "sra", "http", "ena" + includeGroups "http", "ena" excludeGroups "slow", "broken" } } diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java index 7e18940f2d..4941ddb5af 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java @@ -201,7 +201,7 @@ private static Optional> getInferredCon } else if (ext.equals((FileExtensions.SAM))) { return Optional.of(new Tuple<>(BundleResourceType.CT_ALIGNED_READS, BundleResourceType.FMT_READS_SAM)); } - // TODO: finish this, else SRA, htsget,... + // TODO: finish this, else htsget,... } return Optional.empty(); } diff --git a/src/main/java/htsjdk/samtools/Defaults.java b/src/main/java/htsjdk/samtools/Defaults.java index e26509d60f..948be53815 100644 --- a/src/main/java/htsjdk/samtools/Defaults.java +++ b/src/main/java/htsjdk/samtools/Defaults.java @@ -95,12 +95,6 @@ public class Defaults { */ public static final String EBI_REFERENCE_SERVICE_URL_MASK; - /** - * Boolean describing whether downloading of SRA native libraries is allowed, - * in case such native libraries are not found locally. Default = false. - */ - public static final boolean SRA_LIBRARIES_DOWNLOAD; - /** * Whether to attempt to use jlibdeflate (libdeflate via JNI) for DEFLATE compression and decompression. * When true, the default deflater/inflater factories will try to load the native library and fall back @@ -143,7 +137,6 @@ public class Defaults { CUSTOM_READER_FACTORY = getStringProperty("custom_reader", ""); SAM_FLAG_FIELD_FORMAT = SamFlagField.valueOf(getStringProperty("sam_flag_field_format", SamFlagField.DECIMAL.name())); - SRA_LIBRARIES_DOWNLOAD = getBooleanProperty("sra_libraries_download", false); USE_LIBDEFLATE = getBooleanProperty("use_libdeflate", true); DISABLE_SNAPPY_COMPRESSOR = getBooleanProperty(DISABLE_SNAPPY_PROPERTY_NAME, false); OPTIMISTIC_VCF_4_4 = getBooleanProperty(OPTIMISTIC_VCF_4_4_PROPERTY, false); diff --git a/src/main/java/htsjdk/samtools/SRAFileReader.java b/src/main/java/htsjdk/samtools/SRAFileReader.java deleted file mode 100644 index 9bca117308..0000000000 --- a/src/main/java/htsjdk/samtools/SRAFileReader.java +++ /dev/null @@ -1,301 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -/** - * Created by andrii.nikitiuk on 8/11/15. - */ -package htsjdk.samtools; - -import htsjdk.samtools.SamReader.Type; -import htsjdk.samtools.sra.ReferenceCache; -import htsjdk.samtools.sra.SRAAccession; -import htsjdk.samtools.util.CloseableIterator; -import htsjdk.samtools.util.Log; -import java.util.List; -import ngs.ErrorMsg; -import ngs.ReadCollection; -import ngs.ReadGroupIterator; -import ngs.ReferenceIterator; - -public class SRAFileReader extends SamReader.ReaderImplementation implements SamReader.Indexing { - private static final Log log = Log.getInstance(SRAFileReader.class); - private SRAAccession acc; - private SAMFileHeader virtualHeader; - private ReadCollection run; - private ValidationStringency validationStringency; - private SRAIterator.RecordRangeInfo recordRangeInfo; - private SRAIndex index; - private ReferenceCache cachedReferences; - - public SRAFileReader(final SRAAccession acc) { - this.acc = acc; - - if (!acc.isValid()) { - throw new IllegalArgumentException("SRAFileReader: cannot resolve SRA accession '" + acc + "'\n" - + "Possible causes are an invalid SRA accession or a connection problem."); - } - - try { - run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc.toString()); - virtualHeader = loadSamHeader(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - - cachedReferences = new ReferenceCache(run, virtualHeader); - recordRangeInfo = SRAIterator.getRecordsRangeInfo(run); - index = new SRAIndex(virtualHeader, recordRangeInfo); - } - - @Override - public Type type() { - return Type.SRA_TYPE; - } - - @Override - public boolean hasIndex() { - return true; - } - - @Override - public BAMIndex getIndex() { - return index; - } - - @Override - public SAMFileHeader getFileHeader() { - return virtualHeader; - } - - @Override - public CloseableIterator getIterator() { - return getIterator(getFilePointerSpanningReads()); - } - - @Override - public CloseableIterator getIterator(SAMFileSpan chunks) { - if (run == null) { - throw new RuntimeException("Cannot create iterator - SRA run is uninitialized"); - } - - if (virtualHeader == null) { - throw new RuntimeException("Cannot create iterator - SAM file header is uninitialized"); - } - - List chunkList = ((BAMFileSpan) chunks).getChunks(); - - final SRAIterator newIterator = - new SRAIterator(acc, run, virtualHeader, cachedReferences, recordRangeInfo, chunkList); - if (validationStringency != null) { - newIterator.setValidationStringency(validationStringency); - } - - return newIterator; - } - - @Override - public SAMFileSpan getFilePointerSpanningReads() { - if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { - throw new RuntimeException("Cannot create file span - SRA file is empty"); - } - - return new BAMFileSpan(new Chunk(0, recordRangeInfo.getTotalRecordRangeLength())); - } - - @Override - public CloseableIterator query(QueryInterval[] intervals, boolean contained) { - BAMFileSpan span = new BAMFileSpan(); - BrowseableBAMIndex index = getBrowseableIndex(); - - for (QueryInterval interval : intervals) { - BAMFileSpan intervalSpan; - if (!contained) { - intervalSpan = index.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); - - } else { - intervalSpan = getSpanContained(interval.referenceIndex, interval.start, interval.end); - } - span.add(intervalSpan); - } - - return getIterator(span); - } - - @Override - public CloseableIterator queryAlignmentStart(String sequence, int start) { - int sequenceIndex = virtualHeader.getSequenceIndex(sequence); - if (sequenceIndex == -1) { - throw new IllegalArgumentException("Unknown sequence '" + sequence + "' was passed to SRAFileReader"); - } - - return getIterator(getSpanContained(sequenceIndex, start, -1)); - } - - @Override - public CloseableIterator queryUnmapped() { - if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { - throw new RuntimeException("Cannot create file span - SRA file is empty"); - } - - SAMFileSpan span = new BAMFileSpan( - new Chunk(recordRangeInfo.getTotalReferencesLength(), recordRangeInfo.getTotalRecordRangeLength())); - return getIterator(span); - } - - @Override - public void close() { - run = null; - } - - @Override - public ValidationStringency getValidationStringency() { - return validationStringency; - } - - /** INDEXING */ - - /** - * Returns true if the supported index is browseable, meaning the bins in it can be traversed - * and chunk data inspected and retrieved. - * - * @return True if the index supports the BrowseableBAMIndex interface. False otherwise. - */ - @Override - public boolean hasBrowseableIndex() { - return true; - } - - /** - * Gets an index tagged with the BrowseableBAMIndex interface. Throws an exception if no such - * index is available. - * - * @return An index with a browseable interface, if possible. - * @throws SAMException if no such index is available. - */ - @Override - public BrowseableBAMIndex getBrowseableIndex() { - return index; - } - - /** - * Iterate through the given chunks in the file. - * - * @param chunks List of chunks for which to retrieve data. - * @return An iterator over the given chunks. - */ - @Override - public SAMRecordIterator iterator(final SAMFileSpan chunks) { - CloseableIterator it = getIterator(chunks); - if (it == null) { - return null; - } - return (SAMRecordIterator) it; - } - - /** ReaderImplementation */ - @Override - void enableFileSource(final SamReader reader, final boolean enabled) { - log.info("enableFileSource is not supported"); - } - - @Override - void enableIndexCaching(final boolean enabled) { - log.info("enableIndexCaching is not supported"); - } - - @Override - void enableIndexMemoryMapping(final boolean enabled) { - log.info("enableIndexMemoryMapping is not supported"); - } - - @Override - void enableCrcChecking(final boolean enabled) { - log.info("enableCrcChecking is not supported"); - } - - @Override - void setSAMRecordFactory(final SAMRecordFactory factory) { - log.info("setSAMRecordFactory is not supported"); - } - - @Override - void setValidationStringency(final ValidationStringency validationStringency) { - this.validationStringency = validationStringency; - } - - protected SRAIterator.RecordRangeInfo getRecordsRangeInfo() { - return recordRangeInfo; - } - - private SAMFileHeader loadSamHeader() throws ErrorMsg { - if (run == null) { - throw new RuntimeException("Cannot load SAMFileHeader - SRA run is uninitialized"); - } - - String runName = run.getName(); - - SAMFileHeader header = new SAMFileHeader(); - header.setSortOrder(SAMFileHeader.SortOrder.coordinate); - - ReadGroupIterator itRg = run.getReadGroups(); - while (itRg.nextReadGroup()) { - String rgName = itRg.getName(); - if (rgName.isEmpty()) rgName = runName; - SAMReadGroupRecord rg = new SAMReadGroupRecord(rgName); - rg.setSample(runName); - header.addReadGroup(rg); - } - - ReferenceIterator itRef = run.getReferences(); - while (itRef.nextReference()) { - header.addSequence(new SAMSequenceRecord(itRef.getCanonicalName(), (int) itRef.getLength())); - } - - return header; - } - - private BAMFileSpan getSpanContained(int sequenceIndex, long start, long end) { - if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { - throw new RuntimeException("Cannot create file span - SRA file is empty"); - } - - long sequenceOffset = recordRangeInfo.getReferenceOffsets().get(sequenceIndex); - long sequenceLength = recordRangeInfo.getReferenceLengthsAligned().get(sequenceIndex); - if (end == -1) { - end = sequenceLength; - } - - if (start > sequenceLength) { - throw new IllegalArgumentException("Sequence start position is larger than its length"); - } - - if (end > sequenceLength) { - throw new IllegalArgumentException("Sequence end position is larger than its length"); - } - - return new BAMFileSpan(new Chunk(sequenceOffset + start, sequenceOffset + end)); - } -} diff --git a/src/main/java/htsjdk/samtools/SRAIndex.java b/src/main/java/htsjdk/samtools/SRAIndex.java deleted file mode 100644 index d546aada7b..0000000000 --- a/src/main/java/htsjdk/samtools/SRAIndex.java +++ /dev/null @@ -1,259 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -/** - * Emulates BAM index so that we can request chunks of records from SRAFileReader - * - * Here is how it works: - * SRA allows reading of alignments by Reference position fast, so we divide our "file" range for alignments as - * a length of all references. Reading unaligned reads is then fast if we use read positions for lookup and (internally) - * filter out aligned fragments. - * - * Total SRA "file" range is calculated as sum of all reference lengths plus number of reads (both aligned and unaligned) - * in SRA archive. - * - * Now, we can use Chunks to lookup for aligned and unaligned fragments. - * - * We emulate BAM index bins by mapping SRA reference positions to bin numbers. - * And then we map from bin number to list of chunks, which represent SRA "file" positions (which are simply reference - * positions). - * - * We only emulate last level of BAM index bins (and they refer to a portion of reference SRA_BIN_SIZE bases long). - * For all other bins RuntimeException will be returned (but since nobody else creates bins, except SRAIndex class - * that is fine). - * - * But since the last level of bins was not meant to refer to fragments that only partially overlap bin reference - * positions, we also return chunk that goes 5000 bases left before beginning of the bin to assure fragments that - * start before the bin positions but still overlap with it can be retrieved by SRA reader. - * Later we will add support to NGS API to get a maximum number of bases that we need to go left to retrieve such fragments. - * - * Created by andrii.nikitiuk on 9/4/15. - */ -public class SRAIndex implements BrowseableBAMIndex { - /** - * Number of reference bases bins in last level can represent - */ - public static final int SRA_BIN_SIZE = 16 * 1024; - - /** - * Chunks of that size will be created when using SRA index - */ - public static final int SRA_CHUNK_SIZE = 50000; - - /** - * First bin number in last level - */ - private static final int SRA_BIN_INDEX_OFFSET = - GenomicIndexUtil.LEVEL_STARTS[GenomicIndexUtil.LEVEL_STARTS.length - 1]; - - /** - * How many bases should we go left on the reference to find all fragments that start before requested interval - * but overlap with it - */ - private static final int MAX_FRAGMENT_OVERLAP = 5000; - - private SAMFileHeader header; - private SRAIterator.RecordRangeInfo recordRangeInfo; - - /** - * @param header sam header - * @param recordRangeInfo info about record ranges withing SRA archive - */ - public SRAIndex(SAMFileHeader header, SRAIterator.RecordRangeInfo recordRangeInfo) { - this.header = header; - this.recordRangeInfo = recordRangeInfo; - } - - /** - * Gets the size (number of bins in) a given level of a BAM index. - * @param levelNumber Level for which to inspect the size. - * @return Size of the given level. - */ - @Override - public int getLevelSize(int levelNumber) { - if (levelNumber == GenomicIndexUtil.LEVEL_STARTS.length - 1) - return GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[levelNumber] - 1; - else return GenomicIndexUtil.LEVEL_STARTS[levelNumber + 1] - GenomicIndexUtil.LEVEL_STARTS[levelNumber]; - } - - /** - * SRA only operates on bins from last level - * @param bin The bin for which to determine the level. - * @return bin level - */ - @Override - public int getLevelForBin(Bin bin) { - if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) { - throw new RuntimeException("SRA only supports bins from the last level"); - } - return GenomicIndexUtil.LEVEL_STARTS.length - 1; - } - - /** - * Gets the first locus that this bin can index into. - * @param bin The bin to test. - * @return first position that associated with given bin number - */ - @Override - public int getFirstLocusInBin(Bin bin) { - if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) { - throw new RuntimeException("SRA only supports bins from the last level"); - } - - return (bin.getBinNumber() - SRA_BIN_INDEX_OFFSET) * SRA_BIN_SIZE + 1; - } - - /** - * Gets the last locus that this bin can index into. - * @param bin The bin to test. - * @return last position that associated with given bin number - */ - @Override - public int getLastLocusInBin(Bin bin) { - if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) { - throw new RuntimeException("SRA only supports bins from the last level"); - } - - return (bin.getBinNumber() - SRA_BIN_INDEX_OFFSET + 1) * SRA_BIN_SIZE; - } - - /** - * Provides a list of bins that contain bases at requested positions - * @param referenceIndex sequence of desired SAMRecords - * @param startPos 1-based start of the desired interval, inclusive - * @param endPos 1-based end of the desired interval, inclusive - * @return a list of bins that contain relevant data - */ - @Override - public BinList getBinsOverlapping(int referenceIndex, int startPos, int endPos) { - long refLength = recordRangeInfo.getReferenceLengthsAligned().get(referenceIndex); - - // convert to chunk address space within reference - long refStartPos = startPos - 1; - long refEndPos = endPos; - if (refEndPos >= refLength) { - throw new RuntimeException("refEndPos is larger than reference length"); - } - - int firstBinNumber = (int) refStartPos / SRA_BIN_SIZE; - int lastBinNumber = (int) (refEndPos - 1) / SRA_BIN_SIZE; - - int numberOfBins = ((int) refLength / SRA_BIN_SIZE) + 1; - - BitSet binBitSet = new BitSet(); - binBitSet.set(0, SRA_BIN_INDEX_OFFSET, false); - if (firstBinNumber > 0) { - binBitSet.set(SRA_BIN_INDEX_OFFSET, SRA_BIN_INDEX_OFFSET + firstBinNumber, false); - } - binBitSet.set(SRA_BIN_INDEX_OFFSET + firstBinNumber, SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, true); - if (lastBinNumber + 1 < numberOfBins) { - binBitSet.set(SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, SRA_BIN_INDEX_OFFSET + numberOfBins, false); - } - - return new BinList(referenceIndex, binBitSet); - } - - @Override - public BAMFileSpan getSpanOverlapping(Bin bin) { - return new BAMFileSpan(getBinChunks(bin)); - } - - @Override - public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endPos) { - BinList binList = getBinsOverlapping(referenceIndex, startPos, endPos); - BAMFileSpan result = new BAMFileSpan(); - Set savedChunks = new HashSet(); - for (Bin bin : binList) { - List chunks = getSpanOverlapping(bin).getChunks(); - for (Chunk chunk : chunks) { - if (!savedChunks.contains(chunk)) { - savedChunks.add(chunk); - result.add(chunk); - } - } - } - - return result; - } - - /** - * @return a position where aligned fragments end - */ - @Override - public long getStartOfLastLinearBin() { - int numberOfReferences = recordRangeInfo.getReferenceLengthsAligned().size(); - long refOffset = recordRangeInfo.getReferenceOffsets().get(numberOfReferences - 1); - long lastChunkNumber = - recordRangeInfo.getReferenceLengthsAligned().get(numberOfReferences - 1) / SRA_CHUNK_SIZE; - return lastChunkNumber * SRA_CHUNK_SIZE + refOffset; - } - - @Override - public BAMIndexMetaData getMetaData(int reference) { - throw new UnsupportedOperationException("Getting of BAM index metadata for SRA is not implemented"); - } - - @Override - public void close() {} - - /** - * @param bin Requested bin - * @return chunks that represent all bases of requested bin - */ - private List getBinChunks(Bin bin) { - if (bin.containsChunks()) { - return bin.getChunkList(); - } - - if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) { - throw new RuntimeException("SRA only supports bins from the last level"); - } - int binNumber = bin.getBinNumber() - SRA_BIN_INDEX_OFFSET; - long refOffset = recordRangeInfo.getReferenceOffsets().get(bin.getReferenceSequence()); - - // move requested position MAX_FRAGMENT_OVERLAP bases behind, so that we take all the reads that overlap - // requested position - int firstChunkCorrection = binNumber == 0 ? 0 : -MAX_FRAGMENT_OVERLAP; - - long binGlobalOffset = binNumber * SRA_BIN_SIZE + refOffset; - long firstChunkNumber = (binGlobalOffset + firstChunkCorrection) / SRA_CHUNK_SIZE; - long lastChunkNumber = (binGlobalOffset + SRA_BIN_SIZE - 1) / SRA_CHUNK_SIZE; - List chunks = new ArrayList(); - for (long chunkNumber = firstChunkNumber; chunkNumber <= lastChunkNumber; chunkNumber++) { - chunks.add(new Chunk(chunkNumber * SRA_CHUNK_SIZE, (chunkNumber + 1) * SRA_CHUNK_SIZE)); - } - - return chunks; - } -} diff --git a/src/main/java/htsjdk/samtools/SRAIterator.java b/src/main/java/htsjdk/samtools/SRAIterator.java deleted file mode 100644 index 026a6bf500..0000000000 --- a/src/main/java/htsjdk/samtools/SRAIterator.java +++ /dev/null @@ -1,263 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -/** - * Created by andrii.nikitiuk on 8/11/15. - */ -package htsjdk.samtools; - -import htsjdk.samtools.SAMFileHeader.SortOrder; -import htsjdk.samtools.sra.ReferenceCache; -import htsjdk.samtools.sra.SRAAccession; -import htsjdk.samtools.sra.SRAAlignmentIterator; -import htsjdk.samtools.sra.SRAUnalignmentIterator; -import htsjdk.samtools.sra.SRAUtils; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import ngs.ErrorMsg; -import ngs.ReadCollection; - -/** - * SRA iterator which returns SAMRecords for requested list of chunks - */ -public class SRAIterator implements SAMRecordIterator { - private ValidationStringency validationStringency; - - private SRAAccession accession; - private ReadCollection run; - private SAMFileHeader header; - private ReferenceCache cachedReferences; - private RecordRangeInfo recordRangeInfo; - private Iterator chunksIterator; - private Chunk currentChunk; - - private SRAAlignmentIterator alignmentIterator; - private SRAUnalignmentIterator unalignmentIterator; - - /** - * Describes record ranges info needed for emulating BAM index - */ - public static class RecordRangeInfo { - private List referenceOffsets; - private List referenceLengthsAligned; - private long totalReferencesLength; - private long numberOfReads; // is used for unaligned read space - private long totalRecordRangeLength; - - /** - * @param referenceLengthsAligned a list with lengths of each reference - * @param numberOfReads total number of reads within SRA archive - */ - public RecordRangeInfo(List referenceLengthsAligned, long numberOfReads) { - this.numberOfReads = numberOfReads; - this.referenceLengthsAligned = referenceLengthsAligned; - - referenceOffsets = new ArrayList(); - - totalReferencesLength = 0; - for (Long refLen : referenceLengthsAligned) { - referenceOffsets.add(totalReferencesLength); - totalReferencesLength += refLen; - } - - totalRecordRangeLength = totalReferencesLength + this.numberOfReads; - } - - public long getNumberOfReads() { - return numberOfReads; - } - - public long getTotalReferencesLength() { - return totalReferencesLength; - } - - public long getTotalRecordRangeLength() { - return totalRecordRangeLength; - } - - public final List getReferenceOffsets() { - return Collections.unmodifiableList(referenceOffsets); - } - - public final List getReferenceLengthsAligned() { - return Collections.unmodifiableList(referenceLengthsAligned); - } - } - - /** - * Loads record ranges needed for emulating BAM index - * @param run read collection - * @return record ranges - */ - public static RecordRangeInfo getRecordsRangeInfo(ReadCollection run) { - try { - return new RecordRangeInfo(SRAUtils.getReferencesLengthsAligned(run), SRAUtils.getNumberOfReads(run)); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - /** - * @param run opened read collection - * @param header sam header - * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader - * @param recordRangeInfo info about record ranges withing SRA archive - * @param chunks used to determine which records the iterator should return - */ - public SRAIterator( - SRAAccession accession, - final ReadCollection run, - final SAMFileHeader header, - ReferenceCache cachedReferences, - final RecordRangeInfo recordRangeInfo, - final List chunks) { - this.accession = accession; - this.run = run; - this.header = header; - this.cachedReferences = cachedReferences; - this.recordRangeInfo = recordRangeInfo; - chunksIterator = chunks.iterator(); - if (chunksIterator.hasNext()) { - currentChunk = chunksIterator.next(); - } - - hasNext(); - } - - /** - * NGS iterators implement a single method "nextObject" which return true if the operation was successful or - * false if there are no more objects available. - * That means that there is no way to check "hasNext" without actually moving the iterator forward. - * Because of that all the logic of moving iterator forward is actually happens in "hasNext". - * - * Here is explanation of how it works: - * Iterator holds a list of chunks of requested records. Here we have chunksIterator that walks though that list. - * We walk though that list using chunksIterator. If current chunk can represent aligned fragments then we create - * SRAAlignmentIterator iterator, pass the chunk into it and ask if it can find any record. If record was found, - * we say that we have next; otherwise we check if the chunk can represent unaligned fragments and then create - * SRAUnalignmentIterator if so and do the same steps as with alignemnt iterator. - * - * If record was not found in both SRAAlignmentIterator and SRAUnalignmentIterator (it is possible that reference - * range has no alignments or that reads range has all aligned fragment), we try the next chunk. - * - * When there are no more chunks and both iterators have no more records we return false. - * - * @return true if there are more records available - */ - @Override - public boolean hasNext() { - while (currentChunk != null) { - if (alignmentIterator == null) { - if (currentChunk.getChunkStart() < recordRangeInfo.getTotalReferencesLength()) { - alignmentIterator = new SRAAlignmentIterator( - accession, run, header, cachedReferences, recordRangeInfo, currentChunk); - if (validationStringency != null) { - alignmentIterator.setValidationStringency(validationStringency); - } - } - } - - if (alignmentIterator != null && alignmentIterator.hasNext()) { - return true; - } - - if (unalignmentIterator == null) { - if (currentChunk.getChunkEnd() > recordRangeInfo.getTotalReferencesLength()) { - unalignmentIterator = - new SRAUnalignmentIterator(accession, run, header, recordRangeInfo, currentChunk); - if (validationStringency != null) { - unalignmentIterator.setValidationStringency(validationStringency); - } - } - } - if (unalignmentIterator != null && unalignmentIterator.hasNext()) { - return true; - } - - if (alignmentIterator != null) { - alignmentIterator.close(); - } - alignmentIterator = null; - unalignmentIterator = null; - if (chunksIterator.hasNext()) { - currentChunk = chunksIterator.next(); - } else { - currentChunk = null; - } - } - return false; - } - - /** - * Call hasNext to make sure that one of inner iterators points to the next record, the retrieve the record from - * one of them. - * @return lazy SRA record - */ - @Override - public SAMRecord next() { - if (!hasNext()) { - throw new NoSuchElementException("No more records are available in SRAIterator"); - } - - if (alignmentIterator != null && alignmentIterator.hasNext()) { - return alignmentIterator.next(); - } - - return unalignmentIterator.next(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Removal of records not implemented."); - } - - @Override - public void close() { - if (alignmentIterator != null) { - alignmentIterator.close(); - alignmentIterator = null; - } - } - - @Override - public SAMRecordIterator assertSorted(final SortOrder sortOrder) { - throw new UnsupportedOperationException("assertSorted is not implemented."); - } - - public void setValidationStringency(ValidationStringency validationStringency) { - this.validationStringency = validationStringency; - - if (alignmentIterator != null) { - alignmentIterator.setValidationStringency(validationStringency); - } - if (unalignmentIterator != null) { - unalignmentIterator.setValidationStringency(validationStringency); - } - } -} diff --git a/src/main/java/htsjdk/samtools/SamInputResource.java b/src/main/java/htsjdk/samtools/SamInputResource.java index df854681a8..cb070d49bc 100644 --- a/src/main/java/htsjdk/samtools/SamInputResource.java +++ b/src/main/java/htsjdk/samtools/SamInputResource.java @@ -28,7 +28,6 @@ import htsjdk.samtools.seekablestream.SeekablePathStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; -import htsjdk.samtools.sra.SRAAccession; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Lazy; import htsjdk.samtools.util.RuntimeIOException; @@ -131,10 +130,6 @@ public static SamInputResource of(final SeekableStream seekableStream) { return new SamInputResource(new SeekableStreamInputResource(seekableStream)); } - public static SamInputResource of(final SRAAccession acc) { - return new SamInputResource(new SRAInputResource(acc)); - } - /** * Creates a {@link SamInputResource} from a URI which may represent an htsget path, * or some other resource such as a filesystem path or a URL, with no index. @@ -241,7 +236,6 @@ enum Type { URL, SEEKABLE_STREAM, INPUT_STREAM, - SRA_ACCESSION, HTSGET } @@ -266,9 +260,6 @@ final Type type() { /** All resource types support {@link java.io.InputStream} generation. */ abstract InputStream asUnbufferedInputStream(); - /** SRA archive resource */ - abstract SRAAccession asSRAAccession(); - @Override public String toString() { final String childToString; @@ -288,9 +279,6 @@ public String toString() { case URL: childToString = asUrl().toString(); break; - case SRA_ACCESSION: - childToString = asSRAAccession().toString(); - break; case HTSGET: childToString = ((HtsgetInputResource) this).uri.toString(); break; @@ -363,11 +351,6 @@ public InputStream asUnbufferedInputStream() { } } } - - @Override - public SRAAccession asSRAAccession() { - return null; - } } class PathInputResource extends InputResource { @@ -428,11 +411,6 @@ public SeekableStream asUnbufferedSeekableStream() { public InputStream asUnbufferedInputStream() { return asUnbufferedSeekableStream(); } - - @Override - public SRAAccession asSRAAccession() { - return null; - } } class UrlInputResource extends InputResource { @@ -482,11 +460,6 @@ public SeekableStream asUnbufferedSeekableStream() { public InputStream asUnbufferedInputStream() { return asUnbufferedSeekableStream(); } - - @Override - public SRAAccession asSRAAccession() { - return null; - } } class SeekableStreamInputResource extends InputResource { @@ -522,11 +495,6 @@ SeekableStream asUnbufferedSeekableStream() { InputStream asUnbufferedInputStream() { return asUnbufferedSeekableStream(); } - - @Override - public SRAAccession asSRAAccession() { - return null; - } } class InputStreamInputResource extends InputResource { @@ -562,51 +530,6 @@ SeekableStream asUnbufferedSeekableStream() { InputStream asUnbufferedInputStream() { return inputStreamResource; } - - @Override - public SRAAccession asSRAAccession() { - return null; - } -} - -class SRAInputResource extends InputResource { - - final SRAAccession accession; - - SRAInputResource(final SRAAccession accession) { - super(Type.SRA_ACCESSION); - this.accession = accession; - } - - @Override - File asFile() { - return null; - } - - @Override - Path asPath() { - return null; - } - - @Override - URL asUrl() { - return null; - } - - @Override - SeekableStream asUnbufferedSeekableStream() { - return null; - } - - @Override - InputStream asUnbufferedInputStream() { - return null; - } - - @Override - public SRAAccession asSRAAccession() { - return accession; - } } // TODO: replace this with an InputResource type taking HtsPath once this interface is available, see @@ -648,9 +571,4 @@ SeekableStream asUnbufferedSeekableStream() { InputStream asUnbufferedInputStream() { return null; } - - @Override - SRAAccession asSRAAccession() { - return null; - } } diff --git a/src/main/java/htsjdk/samtools/SamReader.java b/src/main/java/htsjdk/samtools/SamReader.java index d288db464c..abe4905e76 100644 --- a/src/main/java/htsjdk/samtools/SamReader.java +++ b/src/main/java/htsjdk/samtools/SamReader.java @@ -81,7 +81,6 @@ public String toString() { } } - public static final Type SRA_TYPE = new TypeImpl("SRA", "sra", null); public static final Type CRAM_TYPE = new TypeImpl("CRAM", "cram", "crai"); public static final Type BAM_TYPE = new TypeImpl("BAM", "bam", "bai"); public static final Type SAM_TYPE = new TypeImpl("SAM", "sam", null); diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index ed2b3fb19b..e46b17725b 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -27,7 +27,6 @@ import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.seekablestream.SeekableStream; -import htsjdk.samtools.sra.SRAAccession; import htsjdk.samtools.util.*; import htsjdk.samtools.util.zip.InflaterFactory; import java.io.File; @@ -382,8 +381,6 @@ public SamReader open(final SamInputResource resource) { validationStringency, this.samRecordFactory); } - } else if (type == InputResource.Type.SRA_ACCESSION) { - primitiveSamReader = new SRAFileReader(data.asSRAAccession()); } else { InputStream bufferedStream = IOUtil.maybeBufferInputStream( data.asUnbufferedInputStream(), @@ -467,11 +464,6 @@ public SamReader open(final SamInputResource resource) { primitiveSamReader = new CRAMFileReader(sourceFile, indexFile, referenceSource, validationStringency); } - } else if (sourceFile != null && isSra(sourceFile)) { - if (bufferedStream != null) { - bufferedStream.close(); - } - primitiveSamReader = new SRAFileReader(new SRAAccession(sourceFile.getPath())); } else { if (indexDefined) { bufferedStream.close(); @@ -496,19 +488,6 @@ public SamReader open(final SamInputResource resource) { } } - /** Attempts to detect whether the file is an SRA accessioned file. If SRA support is not available, returns false. */ - private boolean isSra(final File sourceFile) { - try { - // if SRA fails to initialize (the most common reason is a failure to find/load native libraries), - // it will throw a subclass of java.lang.Error and here we only catch subclasses of java.lang.Exception - // - // Note: SRA initialization errors should not be ignored, but rather shown to user - return SRAAccession.isValid(sourceFile.getPath()); - } catch (final Exception e) { - return false; - } - } - public static SamReaderFactory copyOf(final SamReaderFactoryImpl target) { return new SamReaderFactoryImpl( target.enabledOptions, @@ -541,11 +520,6 @@ void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); } - @Override - void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { - underlyingReader.enableFileSource(reader, true); - } - @Override void applyTo(final HtsgetBAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); @@ -575,11 +549,6 @@ void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexCaching(true); } - @Override - void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { - underlyingReader.enableIndexCaching(true); - } - @Override void applyTo(final HtsgetBAMFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); @@ -609,11 +578,6 @@ void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexMemoryMapping(false); } - @Override - void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { - underlyingReader.enableIndexMemoryMapping(false); - } - @Override void applyTo(final HtsgetBAMFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); @@ -640,11 +604,6 @@ void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } - @Override - void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { - logDebugIgnoringOption(reader, this); - } - @Override void applyTo(final HtsgetBAMFileReader underlyingReader, final SamReader reader) { underlyingReader.setEagerDecode(true); @@ -671,11 +630,6 @@ void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } - @Override - void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { - logDebugIgnoringOption(reader, this); - } - @Override void applyTo(final HtsgetBAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableCrcChecking(true); @@ -693,8 +647,6 @@ void applyTo(final SamReader.PrimitiveSamReaderToSamReaderAdapter reader) { applyTo((SAMTextReader) underlyingReader, reader); } else if (underlyingReader instanceof CRAMFileReader) { applyTo((CRAMFileReader) underlyingReader, reader); - } else if (underlyingReader instanceof SRAFileReader) { - applyTo((SRAFileReader) underlyingReader, reader); } else if (underlyingReader instanceof HtsgetBAMFileReader) { applyTo((HtsgetBAMFileReader) underlyingReader, reader); } else { @@ -717,8 +669,6 @@ private static void logDebugIgnoringOption(final SamReader r, final Option optio abstract void applyTo(final CRAMFileReader underlyingReader, final SamReader reader); - abstract void applyTo(final SRAFileReader underlyingReader, final SamReader reader); - abstract void applyTo(final HtsgetBAMFileReader underlyingReader, final SamReader reader); } } diff --git a/src/main/java/htsjdk/samtools/sra/ReferenceCache.java b/src/main/java/htsjdk/samtools/sra/ReferenceCache.java deleted file mode 100644 index dcd7c8f196..0000000000 --- a/src/main/java/htsjdk/samtools/sra/ReferenceCache.java +++ /dev/null @@ -1,45 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.samtools.SAMFileHeader; -import ngs.ErrorMsg; -import ngs.ReadCollection; -import ngs.Reference; - -/** - * That is a thread-safe wrapper for a list of cache Reference objects. - * Those objects can be used from different threads without issues, however to load and save a Reference object, we - * need to acquire a lock. - * - * Created by andrii.nikitiuk on 10/28/15. - */ -public class ReferenceCache { - private ReadCollection run; - private SAMFileHeader virtualHeader; - private Reference cachedReference; - - public ReferenceCache(ReadCollection run, SAMFileHeader virtualHeader) { - this.run = run; - this.virtualHeader = virtualHeader; - } - - /** - * This method returns Reference objects by reference indexes in SAM header - * Those objects do not maintain thread safety - * - * @param referenceIndex reference index in - * @return a Reference object - */ - public Reference get(int referenceIndex) { - String contig = virtualHeader.getSequence(referenceIndex).getSequenceName(); - - try { - if (cachedReference == null || !cachedReference.getCanonicalName().equals(contig)) { - cachedReference = run.getReference(contig); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return cachedReference; - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAAccession.java b/src/main/java/htsjdk/samtools/sra/SRAAccession.java deleted file mode 100644 index adbbf2a9fa..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAAccession.java +++ /dev/null @@ -1,175 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools.sra; - -import gov.nih.nlm.ncbi.ngs.NGS; -import gov.nih.nlm.ncbi.ngs.error.LibraryLoadError; -import htsjdk.samtools.Defaults; -import htsjdk.samtools.util.Log; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; -import java.util.Arrays; - -/** - * Describes a single SRA accession for SRA read collection - * Also provides app string functionality and allows to check if working SRA is supported on the running platform - * - * Important: due to checks performed in SRAAccession.isValid(), we won't recognise any accessions other - * than ones that follow the pattern "^[SED]RR[0-9]{6,9}$", e.g. SRR000123 - */ -public class SRAAccession implements Serializable { - private static final Log log = Log.getInstance(SRAAccession.class); - - private static boolean noLibraryDownload; - private static boolean initTried = false; - private static String appVersionString = null; - private static final String defaultAppVersionString = "[unknown software]"; - private static final String htsJdkVersionString = "HTSJDK-NGS"; - - static final String REMOTE_ACCESSION_PATTERN = "^[SED]RR[0-9]{6,9}$"; - - private String acc; - - static { - noLibraryDownload = !Defaults.SRA_LIBRARIES_DOWNLOAD; - if (noLibraryDownload) { - System.setProperty("vdb.System.noLibraryDownload", "1"); - } - } - - /** - * Sets an app version string which will let SRA know which software uses it. - * @param appVersionString a string that describes running application - */ - public static void setAppVersionString(String appVersionString) { - SRAAccession.appVersionString = appVersionString; - } - - /** - * @return true if SRA successfully loaded native libraries and fully initialized, - * false otherwise - * @deprecated use {@link #checkIfInitialized} instead - */ - @Deprecated - public static boolean isSupported() { - return checkIfInitialized() == null; - } - - /** - * Tries to initialize SRA. Initialization error is saved during first call, - * all subsequent calls will return the same saved error or null. - * - * @return ExceptionInInitializerError if initialization failed, null if initialization was successful - */ - public static ExceptionInInitializerError checkIfInitialized() { - final ExceptionInInitializerError ngsInitError; - if (!initTried) { - log.debug("Initializing SRA module"); - ngsInitError = NGS.getInitializationError(); - if (ngsInitError != null) { - log.info("SRA initialization failed. Will not be able to read from SRA"); - } else { - NGS.setAppVersionString(getFullVersionString()); - } - initTried = true; - } else { - ngsInitError = NGS.getInitializationError(); - } - return ngsInitError; - } - - /** - * @param acc accession - * @return true if a string is a valid SRA accession - */ - public static boolean isValid(String acc) { - boolean looksLikeSRA = false; - File f = new File(acc); - if (f.isFile()) { - byte[] buffer = new byte[8]; - byte[] signature1 = "NCBI.sra".getBytes(); - byte[] signature2 = "NCBInenc".getBytes(); - - try (InputStream is = new FileInputStream(f)) { - int numRead = is.read(buffer); - - looksLikeSRA = numRead == buffer.length - && (Arrays.equals(buffer, signature1) || Arrays.equals(buffer, signature2)); - } catch (IOException e) { - looksLikeSRA = false; - } - } else if (f.exists()) { - // anything else local other than a file is not an SRA archive - looksLikeSRA = false; - } else { - looksLikeSRA = acc.toUpperCase().matches(REMOTE_ACCESSION_PATTERN); - } - - if (!looksLikeSRA) return false; - - final ExceptionInInitializerError initError = checkIfInitialized(); - if (initError != null) { - if (noLibraryDownload && initError instanceof LibraryLoadError) { - throw new LinkageError( - "Failed to load SRA native libraries and auto-download is disabled. " - + "Please re-run with JVM argument -Dsamjdk.sra_libraries_download=true to enable auto-download of native libraries", - initError); - } else { - throw initError; - } - } - - return NGS.isValid(acc); - } - - /** - * @param acc accession - */ - public SRAAccession(String acc) { - this.acc = acc; - } - - public String toString() { - return acc; - } - - /** - * @return true if contained string is an SRA accession - */ - public boolean isValid() { - return SRAAccession.isValid(acc); - } - - private static String getFullVersionString() { - String versionString = appVersionString == null ? defaultAppVersionString : appVersionString; - versionString += " through " + htsJdkVersionString; - return versionString; - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAAlignmentIterator.java b/src/main/java/htsjdk/samtools/sra/SRAAlignmentIterator.java deleted file mode 100644 index 22035ae90f..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAAlignmentIterator.java +++ /dev/null @@ -1,220 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools.sra; - -import htsjdk.samtools.Chunk; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SRAIterator; -import htsjdk.samtools.ValidationStringency; -import htsjdk.samtools.util.CloseableIterator; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import ngs.Alignment; -import ngs.AlignmentIterator; -import ngs.ErrorMsg; -import ngs.ReadCollection; -import ngs.Reference; - -/** - * Iterator for aligned reads. - * Is used from SRAIterator. - * Created by andrii.nikitiuk on 9/3/15. - */ -public class SRAAlignmentIterator implements CloseableIterator { - private ValidationStringency validationStringency; - - private SRAAccession accession; - private ReadCollection run; - private SAMFileHeader header; - private ReferenceCache cachedReferences; - private List referencesLengths; - private Iterator referencesChunksIterator; - private int currentReference = -1; - - private boolean hasMoreReferences = true; - - private AlignmentIterator alignedIterator; - private Boolean hasMoreAlignments = false; - - private SRALazyRecord lastRecord; - - /** - * @param run opened read collection - * @param header sam header - * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader - * @param recordRangeInfo info about record ranges withing SRA archive - * @param chunk used to determine which alignments the iterator should return - */ - public SRAAlignmentIterator( - SRAAccession accession, - final ReadCollection run, - final SAMFileHeader header, - ReferenceCache cachedReferences, - final SRAIterator.RecordRangeInfo recordRangeInfo, - final Chunk chunk) { - this.accession = accession; - this.run = run; - this.header = header; - this.cachedReferences = cachedReferences; - this.referencesLengths = recordRangeInfo.getReferenceLengthsAligned(); - - referencesChunksIterator = getReferenceChunks(chunk).iterator(); - - try { - nextReference(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public boolean hasNext() { - // check aligned - if (lastRecord != null) { - lastRecord.detachFromIterator(); - lastRecord = null; - } - - if (hasMoreAlignments == null) { - try { - hasMoreAlignments = alignedIterator.nextAlignment(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - while (!hasMoreAlignments && hasMoreReferences) { - nextReference(); - } - - return hasMoreAlignments; - } - - @Override - public SAMRecord next() { - if (!hasNext()) { - throw new NoSuchElementException("No more alignments are available"); - } - - return nextAlignment(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Removal of records not implemented."); - } - - public void setValidationStringency(ValidationStringency validationStringency) { - this.validationStringency = validationStringency; - } - - private SAMRecord nextAlignment() { - try { - lastRecord = new SRALazyRecord( - header, - accession, - run, - alignedIterator, - alignedIterator.getReadId(), - alignedIterator.getAlignmentId()); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - if (validationStringency != null) { - lastRecord.setValidationStringency(validationStringency); - } - - hasMoreAlignments = null; - - return lastRecord; - } - - private void nextReference() { - if (!hasMoreReferences) { - throw new NoSuchElementException("Cannot get next reference - already at last one"); - } - - try { - alignedIterator = null; - hasMoreAlignments = false; - - hasMoreReferences = referencesChunksIterator.hasNext(); - if (!hasMoreReferences) { - return; - } - - currentReference++; - Chunk refChunk = referencesChunksIterator.next(); - if (refChunk == null) { - return; - } - - Reference reference = cachedReferences.get(currentReference); - - alignedIterator = reference.getFilteredAlignmentSlice( - refChunk.getChunkStart(), - refChunk.getChunkEnd() - refChunk.getChunkStart(), - Alignment.all, - Alignment.startWithinSlice | Alignment.passDuplicates | Alignment.passFailed, - 0); - - hasMoreAlignments = alignedIterator.nextAlignment(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private List getReferenceChunks(final Chunk chunk) { - List referencesChunks = new ArrayList(); - long refOffset = 0; - for (Long refLen : referencesLengths) { - if (chunk.getChunkStart() - refOffset >= refLen || chunk.getChunkEnd() - refOffset <= 0) { - referencesChunks.add(null); - } else { - long refChunkStart = Math.max(chunk.getChunkStart() - refOffset, 0); - long refChunkEnd = Math.min(chunk.getChunkEnd() - refOffset, refLen); - referencesChunks.add(new Chunk(refChunkStart, refChunkEnd)); - } - - refOffset += refLen; - } - - return referencesChunks; - } - - @Override - public void close() { - if (lastRecord != null) { - lastRecord.detachFromIterator(); - lastRecord = null; - } - - alignedIterator = null; - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java b/src/main/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java deleted file mode 100644 index 59b943e281..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java +++ /dev/null @@ -1,115 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.reference.ReferenceSequence; -import htsjdk.samtools.reference.ReferenceSequenceFile; -import java.io.IOException; -import java.util.Iterator; -import ngs.ErrorMsg; -import ngs.ReadCollection; -import ngs.Reference; -import ngs.ReferenceIterator; - -/** - * Allows reading Reference data from SRA - */ -public class SRAIndexedSequenceFile implements ReferenceSequenceFile { - private SRAAccession acc; - private ReadCollection run; - private Reference cachedReference; - - private Iterator sequenceRecordIterator; - - protected SAMSequenceDictionary sequenceDictionary; - - /** - * @param acc accession - */ - public SRAIndexedSequenceFile(SRAAccession acc) { - this.acc = acc; - - if (!acc.isValid()) { - throw new RuntimeException("Passed an invalid SRA accession into SRA reader: " + acc); - } - - try { - run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc.toString()); - sequenceDictionary = loadSequenceDictionary(); - } catch (final ErrorMsg e) { - throw new RuntimeException(e); - } - - reset(); - } - - @Override - public SAMSequenceDictionary getSequenceDictionary() { - return sequenceDictionary; - } - - @Override - public ReferenceSequence nextSequence() { - SAMSequenceRecord sequence = sequenceRecordIterator.next(); - return getSubsequenceAt(sequence.getSequenceName(), 1L, sequence.getSequenceLength()); - } - - @Override - public void reset() { - sequenceRecordIterator = sequenceDictionary.getSequences().iterator(); - } - - @Override - public boolean isIndexed() { - return true; - } - - @Override - public ReferenceSequence getSequence(String contig) { - return getSubsequenceAt( - contig, 1L, sequenceDictionary.getSequence(contig).getSequenceLength()); - } - - @Override - public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) { - SAMSequenceRecord sequence = sequenceDictionary.getSequence(contig); - int referenceIndex = sequence.getSequenceIndex(); - - byte[] bases; - - try { - Reference reference; - synchronized (this) { - if (cachedReference == null - || !cachedReference.getCanonicalName().equals(contig)) { - cachedReference = run.getReference(contig); - } - reference = cachedReference; - - bases = reference - .getReferenceBases(start - 1, stop - (start - 1)) - .getBytes(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return new ReferenceSequence(contig, referenceIndex, bases); - } - - @Override - public void close() throws IOException { - cachedReference = null; - } - - protected SAMSequenceDictionary loadSequenceDictionary() throws ErrorMsg { - SAMSequenceDictionary dict = new SAMSequenceDictionary(); - - ReferenceIterator itRef = run.getReferences(); - while (itRef.nextReference()) { - dict.addSequence(new SAMSequenceRecord(itRef.getCanonicalName(), (int) itRef.getLength())); - } - - return dict; - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java b/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java deleted file mode 100644 index 0087c71657..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java +++ /dev/null @@ -1,1053 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools.sra; - -import gov.nih.nlm.ncbi.ngs.NGS; -import htsjdk.samtools.*; -import htsjdk.samtools.util.Log; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import ngs.Alignment; -import ngs.AlignmentIterator; -import ngs.ErrorMsg; -import ngs.Fragment; -import ngs.Read; -import ngs.ReadCollection; -import ngs.ReadIterator; - -/** - * Extends SAMRecord so that any of the fields will be loaded only when needed. - * Since SRA is a column oriented database, it is very inefficient to load all the fields at once. - * However, loading only set of actually needed fields will be even faster than in row oriented databases. - * - * Because of that we are providing lazy loading of fields, flags and attributes. - * - * Created by andrii.nikitiuk on 8/25/15. - */ -public class SRALazyRecord extends SAMRecord { - private static final Log log = Log.getInstance(SRALazyRecord.class); - - private SRAAccession accession; - private boolean isAligned; - private transient ReadCollection run; - private transient Alignment alignmentIterator; - private transient Read unalignmentIterator; - private String sraReadId; - private String sraAlignmentId; - private int unalignedReadFragmentIndex = -1; - - private Set initializedFields = EnumSet.noneOf(LazyField.class); - private Set initializedFlags = EnumSet.noneOf(LazyFlag.class); - private Set initializedAttributes = EnumSet.noneOf(LazyAttribute.class); - - private enum LazyField { - ALIGNMENT_START { - @Override - public void loadValue(SRALazyRecord self) { - self.getAlignmentStart(); - } - }, - MAPPING_QUALITY { - @Override - public void loadValue(SRALazyRecord self) { - self.getMappingQuality(); - } - }, - REFERENCE_NAME { - @Override - public void loadValue(SRALazyRecord self) { - self.getReferenceName(); - } - }, - CIGAR_STRING { - @Override - public void loadValue(SRALazyRecord self) { - self.getCigarString(); - } - }, - BASES { - @Override - public void loadValue(SRALazyRecord self) { - self.getReadBases(); - } - }, - QUALS { - @Override - public void loadValue(SRALazyRecord self) { - self.getBaseQualities(); - } - }, - MATE_ALIGNMENT_START { - @Override - public void loadValue(SRALazyRecord self) { - self.getMateAlignmentStart(); - } - }, - MATE_REFERENCE_NAME { - @Override - public void loadValue(SRALazyRecord self) { - self.getMateReferenceName(); - } - }, - INFERRED_INSERT_SIZE { - @Override - public void loadValue(SRALazyRecord self) { - self.getInferredInsertSize(); - } - }; - - public abstract void loadValue(SRALazyRecord self); - } - - private enum LazyFlag { - READ_NEGATIVE_STRAND(true) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getReadNegativeStrandFlag(); - } - }, - READ_PAIRED(true) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getReadPairedFlag(); - } - }, - PROPER_PAIR(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getProperPairFlag(); - } - }, - SECONDARY_ALIGNMENT(true) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.isSecondaryAlignment(); - } - }, - MATE_NEGATIVE_STRAND(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getMateNegativeStrandFlag(); - } - }, - MATE_UNMAPPED(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getMateUnmappedFlag(); - } - }, - FIRST_OF_PAIR(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getFirstOfPairFlag(); - } - }, - SECOND_OF_PAIR(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getSecondOfPairFlag(); - } - }; - - private final boolean canCallOnNotPaired; - - LazyFlag(final boolean canCallOnNotPaired) { - this.canCallOnNotPaired = canCallOnNotPaired; - } - - public boolean canCallOnNotPaired() { - return canCallOnNotPaired; - } - - public abstract boolean getFlag(SRALazyRecord self); - } - - private enum LazyAttribute { - RG { - @Override - public String getAttribute(SRALazyRecord self) { - return self.getAttributeGroupNameImpl(); - } - }; - - public abstract String getAttribute(SRALazyRecord self); - } - - private static Map lazyAttributeTags; - - static { - lazyAttributeTags = new HashMap(); - lazyAttributeTags.put(SAMTag.RG.getBinaryTag(), LazyAttribute.RG); - } - - public SRALazyRecord( - final SAMFileHeader header, - SRAAccession accession, - ReadCollection run, - AlignmentIterator alignmentIterator, - String readId, - String alignmentId) { - this(header, accession, readId, alignmentId); - - this.run = run; - this.alignmentIterator = alignmentIterator; - } - - public SRALazyRecord( - final SAMFileHeader header, - SRAAccession accession, - ReadCollection run, - ReadIterator unalignmentIterator, - String readId, - int unalignedReadFragmentIndex) { - this(header, accession, readId, unalignedReadFragmentIndex); - - this.run = run; - this.unalignmentIterator = unalignmentIterator; - } - - protected SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, String alignmentId) { - this(header, accession, readId, true); - - this.sraAlignmentId = alignmentId; - } - - protected SRALazyRecord( - final SAMFileHeader header, SRAAccession accession, String readId, int unalignedReadFragmentIndex) { - this(header, accession, readId, false); - - this.unalignedReadFragmentIndex = unalignedReadFragmentIndex; - } - - private SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, boolean isAligned) { - super(header); - - this.accession = accession; - this.isAligned = isAligned; - this.sraReadId = readId; - setReadName(readId); - setReadUnmappedFlag(!isAligned); - } - - /** - * Is being called when original NGS iterator is being moved to the next object. - * Later, if any of uninitialized fields is requested, either Read object or Alignment has to be retrieved from - * ReadCollection - */ - public void detachFromIterator() { - alignmentIterator = null; - unalignmentIterator = null; - } - - // ===== fields ===== - - @Override - public int getAlignmentStart() { - if (!initializedFields.contains(LazyField.ALIGNMENT_START)) { - setAlignmentStart(getAlignmentStartImpl()); - } - return super.getAlignmentStart(); - } - - @Override - public void setAlignmentStart(final int value) { - if (!initializedFields.contains(LazyField.ALIGNMENT_START)) { - initializedFields.add(LazyField.ALIGNMENT_START); - } - super.setAlignmentStart(value); - } - - @Override - public int getMappingQuality() { - if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) { - setMappingQuality(getMappingQualityImpl()); - } - return super.getMappingQuality(); - } - - @Override - public void setMappingQuality(final int value) { - if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) { - initializedFields.add(LazyField.MAPPING_QUALITY); - } - super.setMappingQuality(value); - } - - @Override - public String getReferenceName() { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - setReferenceName(getReferenceNameImpl()); - } - return super.getReferenceName(); - } - - @Override - public void setReferenceName(final String value) { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - initializedFields.add(LazyField.REFERENCE_NAME); - } - super.setReferenceName(value); - } - - @Override - public Integer getReferenceIndex() { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - setReferenceName(getReferenceNameImpl()); - } - return super.getReferenceIndex(); - } - - @Override - public void setReferenceIndex(final int value) { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - initializedFields.add(LazyField.REFERENCE_NAME); - } - super.setReferenceIndex(value); - } - - @Override - public String getCigarString() { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - setCigarString(getCigarStringImpl()); - } - return super.getCigarString(); - } - - @Override - public void setCigarString(final String value) { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - initializedFields.add(LazyField.CIGAR_STRING); - } - super.setCigarString(value); - } - - @Override - public Cigar getCigar() { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - setCigarString(getCigarStringImpl()); - } - return super.getCigar(); - } - - @Override - public void setCigar(final Cigar value) { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - initializedFields.add(LazyField.CIGAR_STRING); - } - super.setCigar(value); - } - - @Override - public byte[] getReadBases() { - if (!initializedFields.contains(LazyField.BASES)) { - setReadBases(getReadBasesImpl()); - } - return super.getReadBases(); - } - - @Override - public void setReadBases(final byte[] value) { - if (!initializedFields.contains(LazyField.BASES)) { - initializedFields.add(LazyField.BASES); - } - super.setReadBases(value); - } - - @Override - public byte[] getBaseQualities() { - if (!initializedFields.contains(LazyField.QUALS)) { - setBaseQualities(getBaseQualitiesImpl()); - } - return super.getBaseQualities(); - } - - @Override - public void setBaseQualities(final byte[] value) { - if (!initializedFields.contains(LazyField.QUALS)) { - initializedFields.add(LazyField.QUALS); - } - super.setBaseQualities(value); - } - - @Override - public int getMateAlignmentStart() { - if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) { - setMateAlignmentStart(getMateAlignmentStartImpl()); - } - return super.getMateAlignmentStart(); - } - - @Override - public void setMateAlignmentStart(final int value) { - if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) { - initializedFields.add(LazyField.MATE_ALIGNMENT_START); - } - super.setMateAlignmentStart(value); - } - - @Override - public String getMateReferenceName() { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - setMateReferenceName(getMateReferenceNameImpl()); - } - return super.getMateReferenceName(); - } - - @Override - public void setMateReferenceName(final String value) { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - initializedFields.add(LazyField.MATE_REFERENCE_NAME); - } - super.setMateReferenceName(value); - } - - @Override - public Integer getMateReferenceIndex() { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - setMateReferenceName(getMateReferenceNameImpl()); - } - return super.getMateReferenceIndex(); - } - - @Override - public void setMateReferenceIndex(final int value) { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - initializedFields.add(LazyField.MATE_REFERENCE_NAME); - } - super.setMateReferenceIndex(value); - } - - @Override - public int getInferredInsertSize() { - if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) { - setInferredInsertSize(getInferredInsertSizeImpl()); - } - return super.getInferredInsertSize(); - } - - @Override - public void setInferredInsertSize(final int value) { - if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) { - initializedFields.add(LazyField.INFERRED_INSERT_SIZE); - } - super.setInferredInsertSize(value); - } - - // ===== flags ===== - - @Override - public int getFlags() { - for (LazyFlag flag : LazyFlag.values()) { - if (initializedFlags.contains(flag)) { - continue; - } - - if (flag.canCallOnNotPaired() || getReadPairedFlag()) { - flag.getFlag(this); - } - } - - return super.getFlags(); - } - - @Override - public void setFlags(final int value) { - for (LazyFlag flag : LazyFlag.values()) { - if (!initializedFlags.contains(flag)) { - initializedFlags.add(flag); - } - } - super.setFlags(value); - } - - @Override - public boolean getReadNegativeStrandFlag() { - if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) { - setReadNegativeStrandFlag(getReadNegativeStrandFlagImpl()); - } - return super.getReadNegativeStrandFlag(); - } - - @Override - public void setReadNegativeStrandFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) { - initializedFlags.add(LazyFlag.READ_NEGATIVE_STRAND); - } - super.setReadNegativeStrandFlag(flag); - } - - @Override - public boolean getReadPairedFlag() { - if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) { - setReadPairedFlag(getReadPairedFlagImpl()); - } - return super.getReadPairedFlag(); - } - - @Override - public void setReadPairedFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) { - initializedFlags.add(LazyFlag.READ_PAIRED); - } - super.setReadPairedFlag(flag); - } - - @Override - public boolean getProperPairFlag() { - if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) { - setProperPairFlag(getProperPairFlagImpl()); - } - return super.getProperPairFlag(); - } - - @Override - public void setProperPairFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) { - initializedFlags.add(LazyFlag.PROPER_PAIR); - } - super.setProperPairFlag(flag); - } - - @Override - public boolean isSecondaryAlignment() { - if (!initializedFlags.contains(LazyFlag.SECONDARY_ALIGNMENT)) { - setSecondaryAlignment(getSecondaryAlignmentFlagImpl()); - } - return super.isSecondaryAlignment(); - } - - @Override - public void setSecondaryAlignment(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.SECONDARY_ALIGNMENT)) { - initializedFlags.add(LazyFlag.SECONDARY_ALIGNMENT); - } - super.setSecondaryAlignment(flag); - } - - @Override - public boolean getMateNegativeStrandFlag() { - if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) { - setMateNegativeStrandFlag(getMateNegativeStrandFlagImpl()); - } - return super.getMateNegativeStrandFlag(); - } - - @Override - public void setMateNegativeStrandFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) { - initializedFlags.add(LazyFlag.MATE_NEGATIVE_STRAND); - } - super.setMateNegativeStrandFlag(flag); - } - - @Override - public boolean getMateUnmappedFlag() { - if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) { - setMateUnmappedFlag(getMateUnmappedFlagImpl()); - } - return super.getMateUnmappedFlag(); - } - - @Override - public void setMateUnmappedFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) { - initializedFlags.add(LazyFlag.MATE_UNMAPPED); - } - super.setMateUnmappedFlag(flag); - } - - @Override - public boolean getFirstOfPairFlag() { - if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) { - setFirstOfPairFlag(getFirstOfPairFlagImpl()); - } - return super.getFirstOfPairFlag(); - } - - @Override - public void setFirstOfPairFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) { - initializedFlags.add(LazyFlag.FIRST_OF_PAIR); - } - super.setFirstOfPairFlag(flag); - } - - @Override - public boolean getSecondOfPairFlag() { - if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) { - setSecondOfPairFlag(getSecondOfPairFlagImpl()); - } - return super.getSecondOfPairFlag(); - } - - @Override - public void setSecondOfPairFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) { - initializedFlags.add(LazyFlag.SECOND_OF_PAIR); - } - super.setSecondOfPairFlag(flag); - } - - // ===== attributes ===== - - @Override - public Object getAttribute(final short tag) { - LazyAttribute attr = lazyAttributeTags.get(tag); - if (attr != null) { - if (!initializedAttributes.contains(attr)) { - setAttribute(tag, attr.getAttribute(this)); - } - } - return super.getAttribute(tag); - } - - @Override - public void setAttribute(final short tag, final Object value) { - LazyAttribute attr = lazyAttributeTags.get(tag); - if (attr != null && !initializedAttributes.contains(attr)) { - initializedAttributes.add(attr); - } - super.setAttribute(tag, value); - } - - @Override - protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) { - LazyAttribute attr = lazyAttributeTags.get(tag); - if (attr != null && !initializedAttributes.contains(attr)) { - initializedAttributes.add(attr); - } - super.setAttribute(tag, value, isUnsignedArray); - } - - @Override - public void clearAttributes() { - for (LazyAttribute lazyAttribute : LazyAttribute.values()) { - if (!initializedAttributes.contains(lazyAttribute)) { - initializedAttributes.add(lazyAttribute); - } - } - super.clearAttributes(); - } - - @Override - protected void setAttributes(final SAMBinaryTagAndValue attributes) { - for (LazyAttribute lazyAttribute : LazyAttribute.values()) { - if (!initializedAttributes.contains(lazyAttribute)) { - initializedAttributes.add(lazyAttribute); - } - } - super.setAttributes(attributes); - } - - @Override - protected SAMBinaryTagAndValue getBinaryAttributes() { - for (Map.Entry info : lazyAttributeTags.entrySet()) { - if (!initializedAttributes.contains(info.getValue())) { - getAttribute(info.getKey()); - } - } - - return super.getBinaryAttributes(); - } - - @Override - public boolean isUnsignedArrayAttribute(final String tag) { - Short binaryTag = SAMTag.makeBinaryTag(tag); - LazyAttribute attr = lazyAttributeTags.get(binaryTag); - if (attr != null && !initializedAttributes.contains(attr)) { - getAttribute(binaryTag); - } - - return super.isUnsignedArrayAttribute(tag); - } - - // ===== misc ==== - - /** - * For records equality, we should only compare read id, reference and position on the reference. - * Since read id is a constructor parameter, we only need to make sure that reference info is loaded. - * @param o other - * @return comparison result - */ - @Override - public boolean equals(final Object o) { - if (o instanceof SRALazyRecord) { - SRALazyRecord otherRecord = (SRALazyRecord) o; - otherRecord.getReferenceIndex(); - otherRecord.getAlignmentStart(); - } - - getReferenceIndex(); - getAlignmentStart(); - - return super.equals(o); - } - - /** - * The same approach as with 'equals' method. We only load reference and position. - */ - @Override - public int hashCode() { - getReferenceIndex(); - getAlignmentStart(); - - return super.hashCode(); - } - - /** - * Performs a deep copy of the SAMRecord and detaches a copy from NGS iterator - * @return new object - * @throws CloneNotSupportedException - */ - @Override - public Object clone() throws CloneNotSupportedException { - SRALazyRecord newObject = (SRALazyRecord) super.clone(); - newObject.initializedFields = EnumSet.copyOf(this.initializedFields); - newObject.initializedFlags = EnumSet.copyOf(this.initializedFlags); - newObject.initializedAttributes = EnumSet.copyOf(this.initializedAttributes); - newObject.detachFromIterator(); - - return newObject; - } - - @Override - public List isValid(final boolean firstOnly) { - loadFields(); - getFlags(); - getBinaryAttributes(); - - return super.isValid(firstOnly); - } - - // =============================== Implementation ======================================== - - private ReadCollection getReadCollection() { - if (run != null) { - return run; - } - - log.debug("Recovering SRA read collection. Accession: " + accession); - try { - return run = NGS.openReadCollection(accession.toString()); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private Alignment getCurrentAlignment() throws ErrorMsg { - if (!isAligned) { - throw new RuntimeException("Should be called for aligned records only"); - } - - if (alignmentIterator == null) { - log.debug("Recovering SAM record after detaching from iterator. Alignment id: " + sraAlignmentId); - if (sraAlignmentId == null) { - throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no alignment id"); - } - - alignmentIterator = getReadCollection().getAlignment(sraAlignmentId); - } - return alignmentIterator; - } - - private Read getCurrentUnalignedRead() throws ErrorMsg { - if (isAligned) { - throw new RuntimeException("Should be called for unaligned records only"); - } - - if (unalignmentIterator == null) { - log.debug("Recovering SAM record after detaching from iterator. Read id: " + sraReadId - + ", fragment index: " + unalignedReadFragmentIndex); - if (sraReadId == null) { - throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no read id"); - } - - Read read = getReadCollection().getRead(sraReadId); - for (int i = 0; i < unalignedReadFragmentIndex + 1; i++) { - read.nextFragment(); - } - - unalignmentIterator = read; - } - return unalignmentIterator; - } - - // ===== fields ===== - - private void loadFields() { - for (LazyField field : LazyField.values()) { - if (initializedFields.contains(field)) { - continue; - } - - field.loadValue(this); - } - } - - private int getAlignmentStartImpl() { - try { - if (isAligned) { - return (int) getCurrentAlignment().getAlignmentPosition() + 1; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_START; - } - - private int getMappingQualityImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getMappingQuality(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_MAPPING_QUALITY; - } - - private String getReferenceNameImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getReferenceSpec(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME; - } - - private String getCigarStringImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getShortCigar(false); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_CIGAR; - } - - private byte[] getReadBasesImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getAlignedFragmentBases().getBytes(); - } else { - return getCurrentUnalignedRead().getFragmentBases().getBytes(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private byte[] getBaseQualitiesImpl() { - try { - Fragment fragment; - if (isAligned) { - fragment = getCurrentAlignment(); - } else { - fragment = getCurrentUnalignedRead(); - } - - // quals are being taken from PRIMARY_ALIGNMENT.SAM_QUALITY column which reverse automatically them if - // needed - return SAMUtils.fastqToPhred(fragment.getFragmentQualities()); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private int getMateAlignmentStartImpl() { - try { - if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) { - Alignment mate = getCurrentAlignment().getMateAlignment(); - return (int) mate.getAlignmentPosition() + 1; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_START; - } - - private String getMateReferenceNameImpl() { - try { - if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) { - return getCurrentAlignment().getMateReferenceSpec(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME; - } - - private int getInferredInsertSizeImpl() { - try { - if (isAligned) { - return (int) getCurrentAlignment().getTemplateLength(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return 0; - } - - // ===== flags ===== - - private boolean getReadNegativeStrandFlagImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getIsReversedOrientation(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return false; - } - - private boolean getReadPairedFlagImpl() { - try { - if (isAligned) { - return getCurrentAlignment().isPaired(); - } else { - return getCurrentUnalignedRead().getNumFragments() > 1; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private boolean getProperPairFlagImpl() { - return isAligned && getReadPairedFlag() && !getMateUnmappedFlag(); - } - - private boolean getSecondaryAlignmentFlagImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getAlignmentCategory() == Alignment.secondaryAlignment; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return false; - } - - private boolean getMateNegativeStrandFlagImpl() { - try { - if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) { - Alignment mate = getCurrentAlignment().getMateAlignment(); - return mate.getIsReversedOrientation(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return false; - } - - private boolean getMateUnmappedFlagImpl() { - try { - if (isAligned) { - return !getCurrentAlignment().hasMate(); - } else { - Read unalignedRead = getCurrentUnalignedRead(); - int numFragments = unalignedRead.getNumFragments(); - int nextFragmentIdx = unalignedReadFragmentIndex + 1; - if (nextFragmentIdx == numFragments) { - nextFragmentIdx = 0; - } - - return unalignedRead.fragmentIsAligned(nextFragmentIdx); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private boolean getFirstOfPairFlagImpl() { - if (!getReadPairedFlag()) { - return false; - } - try { - if (isAligned) { - String fragmentId = getCurrentAlignment().getFragmentId(); - if (!fragmentId.contains(".FA")) { - throw new RuntimeException("Invalid fragment id: " + fragmentId); - } - - return fragmentId.contains(".FA0."); - } else { - return unalignedReadFragmentIndex == 0; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private boolean getSecondOfPairFlagImpl() { - if (!getReadPairedFlag()) { - return false; - } - try { - if (isAligned) { - String fragmentId = getCurrentAlignment().getFragmentId(); - if (!fragmentId.contains(".FA")) { - throw new RuntimeException("Invalid fragment id: " + fragmentId); - } - - return !fragmentId.contains(".FA0."); - } else { - return unalignedReadFragmentIndex != 0; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - // ===== attributes ===== - - private String getAttributeGroupNameImpl() { - try { - String readGroupName; - if (isAligned) { - readGroupName = getCurrentAlignment().getReadGroup(); - } else { - readGroupName = getCurrentUnalignedRead().getReadGroup(); - } - - if (!readGroupName.isEmpty()) { - return readGroupName; - } - return getReadCollection().getName(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java b/src/main/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java deleted file mode 100644 index 78c6513742..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java +++ /dev/null @@ -1,191 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools.sra; - -import htsjdk.samtools.Chunk; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SRAIterator; -import htsjdk.samtools.ValidationStringency; -import java.util.Iterator; -import java.util.NoSuchElementException; -import ngs.ErrorMsg; -import ngs.Read; -import ngs.ReadCollection; -import ngs.ReadIterator; - -/** - * Iterator for unaligned reads. - * Is used from SRAIterator. - * - * Created by andrii.nikitiuk on 9/3/15. - */ -public class SRAUnalignmentIterator implements Iterator { - private ValidationStringency validationStringency; - - private SRAAccession accession; - private ReadCollection run; - private SAMFileHeader header; - private SRAIterator.RecordRangeInfo recordRangeInfo; - - private ReadIterator unalignedIterator; - private boolean hasMoreUnalignedReads = true; - private Boolean hasMoreUnalignedFragments = false; - private int lastUnalignedFragmentIndex; - - private SRALazyRecord lastRecord; - - /** - * - * @param run opened read collection - * @param header sam header - * @param recordRangeInfo info about record ranges withing SRA archive - * @param chunk used to determine which unaligned reads the iterator should return - */ - public SRAUnalignmentIterator( - SRAAccession accession, - final ReadCollection run, - final SAMFileHeader header, - SRAIterator.RecordRangeInfo recordRangeInfo, - Chunk chunk) { - this.accession = accession; - this.run = run; - this.header = header; - this.recordRangeInfo = recordRangeInfo; - - long readStart = chunk.getChunkStart() - recordRangeInfo.getTotalReferencesLength(); - if (readStart < 0) { - readStart = 0; - } else if (readStart >= recordRangeInfo.getNumberOfReads()) { - throw new RuntimeException("Invalid chunk provided: chunkStart position is after last read"); - } - - long readEnd = chunk.getChunkEnd() - recordRangeInfo.getTotalReferencesLength(); - if (readEnd > recordRangeInfo.getNumberOfReads()) { - readEnd = recordRangeInfo.getNumberOfReads(); - } else if (readEnd <= 0) { - throw new RuntimeException("Invalid chunk provided: chunkEnd position is before last read"); - } - - try { - unalignedIterator = - run.getReadRange(readStart + 1, readEnd - readStart, Read.partiallyAligned | Read.unaligned); - nextUnalignedFragment(); - - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public boolean hasNext() { - // check unaligned - if (hasMoreUnalignedFragments == null) { - try { - lastRecord.detachFromIterator(); - nextUnalignedFragment(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - return hasMoreUnalignedFragments; - } - - @Override - public SAMRecord next() { - if (!hasNext()) { - throw new NoSuchElementException("No more alignments are available"); - } - - return nextUnalignment(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Removal of records not implemented."); - } - - public void setValidationStringency(ValidationStringency validationStringency) { - this.validationStringency = validationStringency; - } - - private SAMRecord nextUnalignment() { - try { - lastRecord = new SRALazyRecord( - header, - accession, - run, - unalignedIterator, - unalignedIterator.getReadId(), - lastUnalignedFragmentIndex); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - if (validationStringency != null) { - lastRecord.setValidationStringency(validationStringency); - } - - hasMoreUnalignedFragments = null; - - return lastRecord; - } - - private void nextUnalignedFragment() throws ErrorMsg { - while (hasMoreUnalignedFragments == null || hasMoreUnalignedFragments) { - hasMoreUnalignedFragments = unalignedIterator.nextFragment(); - lastUnalignedFragmentIndex++; - - if (hasMoreUnalignedFragments && !unalignedIterator.isAligned()) { - return; - } - } - - if (!hasMoreUnalignedReads) { - throw new RuntimeException("Cannot get next unaligned read - already at last one"); - } - - while (true) { - hasMoreUnalignedReads = unalignedIterator.nextRead(); - lastUnalignedFragmentIndex = -1; - if (!hasMoreUnalignedReads) { - break; - } - - // search for unaligned fragment - do { - hasMoreUnalignedFragments = unalignedIterator.nextFragment(); - lastUnalignedFragmentIndex++; - } while (hasMoreUnalignedFragments && unalignedIterator.isAligned()); - - // means that we found fragment - if (hasMoreUnalignedFragments) { - return; - } - } - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAUtils.java b/src/main/java/htsjdk/samtools/sra/SRAUtils.java deleted file mode 100644 index 1107c15776..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAUtils.java +++ /dev/null @@ -1,82 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools.sra; - -import java.util.ArrayList; -import java.util.List; -import ngs.ErrorMsg; -import ngs.Read; -import ngs.ReadCollection; -import ngs.ReferenceIterator; - -/** - * Provides some functionality which can be used by other classes - * - * Created by andrii.nikitiuk on 10/28/15. - */ -public class SRAUtils { - /** - * References are stored in SRA table in chunks of 5k bases per row, while last chunk of a reference is less or - * equal than 5k bases in size (even if the next reference follows). - * So, it will be optimal if we align reference sizes to 5k bases to read by reference rows. - */ - public static final int REFERENCE_ALIGNMENT = 5000; - - /** - * Is used to build RecordRangeInfo - * @param run open read collection - * @return total number of reads (both aligned and unaligned) in SRA archive - * @throws ErrorMsg - */ - public static long getNumberOfReads(ReadCollection run) throws ErrorMsg { - return run.getReadCount(Read.all); - } - - /** - * Loads reference lengths from a read collection. - * Aligns reference lengths by REFERENCE_ALIGNMENT bases for optimal loads of alignments - * (references are stored in REFERENCE_ALIGNMENT bases chunks in SRA table) - * - * Is used to build RecordRangeInfo - * @param run single opened read collection - * @return list with references lengths - * @throws ErrorMsg - */ - public static List getReferencesLengthsAligned(ReadCollection run) throws ErrorMsg { - ReferenceIterator refIt = run.getReferences(); - List lengths = new ArrayList(); - while (refIt.nextReference()) { - long refLen = refIt.getLength(); - // lets optimize references so they always align in 5000 bases positions - if (refLen % REFERENCE_ALIGNMENT != 0) { - refLen += REFERENCE_ALIGNMENT - (refLen % REFERENCE_ALIGNMENT); - } - lengths.add(refLen); - } - return lengths; - } -} diff --git a/src/test/java/htsjdk/TestDataProviders.java b/src/test/java/htsjdk/TestDataProviders.java index 9697085bbf..d18b098f0c 100644 --- a/src/test/java/htsjdk/TestDataProviders.java +++ b/src/test/java/htsjdk/TestDataProviders.java @@ -38,7 +38,7 @@ public void independentTestOfDataProviderTest() throws Exception { } /** Groups that require external infrastructure and are excluded from the default test run. */ - private static final Set EXCLUDED_GROUPS = Set.of("htsget", "ftp", "http", "sra", "ena"); + private static final Set EXCLUDED_GROUPS = Set.of("htsget", "ftp", "http", "ena"); @DataProvider(name = "DataprovidersThatDontTestThemselves") private Iterator testAllDataProvidersData() throws Exception { diff --git a/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java b/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java index 8bf8da0c2c..d1a4ae6352 100644 --- a/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java +++ b/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java @@ -258,13 +258,11 @@ public void samRecordFactoryNullHeaderBAMTest() { public Object[][] composeAllPermutationsOfSamInputResource() { final List sources = new ArrayList<>(); for (final InputResource.Type dataType : InputResource.Type.values()) { - if (dataType.equals(InputResource.Type.SRA_ACCESSION) || dataType.equals(InputResource.Type.HTSGET)) - continue; + if (dataType.equals(InputResource.Type.HTSGET)) continue; sources.add(new SamInputResource(composeInputResourceForType(dataType, false))); for (final InputResource.Type indexType : InputResource.Type.values()) { - if (indexType.equals(InputResource.Type.SRA_ACCESSION) || indexType.equals(InputResource.Type.HTSGET)) - continue; + if (indexType.equals(InputResource.Type.HTSGET)) continue; sources.add(new SamInputResource( composeInputResourceForType(dataType, false), composeInputResourceForType(indexType, true))); diff --git a/src/test/java/htsjdk/samtools/SamReaderTest.java b/src/test/java/htsjdk/samtools/SamReaderTest.java index 7d9523c4c7..fa4f15bf1e 100644 --- a/src/test/java/htsjdk/samtools/SamReaderTest.java +++ b/src/test/java/htsjdk/samtools/SamReaderTest.java @@ -206,7 +206,6 @@ public Object[][] testHasValidFileExtensionTestData() { final Set setOfKnownFileTypes = new HashSet<>(); setOfKnownFileTypes.add(SamReader.Type.BAM_TYPE); setOfKnownFileTypes.add(SamReader.Type.SAM_TYPE); - setOfKnownFileTypes.add(SamReader.Type.SRA_TYPE); setOfKnownFileTypes.add(SamReader.Type.CRAM_TYPE); final List list = new ArrayList<>(); diff --git a/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java b/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java deleted file mode 100644 index c9efa3b22f..0000000000 --- a/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java +++ /dev/null @@ -1,90 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.HtsjdkTest; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import java.lang.reflect.Method; -import java.util.NoSuchElementException; -import org.testng.Assert; -import org.testng.SkipException; -import org.testng.annotations.BeforeGroups; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -@Test(groups = "sra") -public abstract class AbstractSRATest extends HtsjdkTest { - private static boolean canResolveNetworkAccession = false; - private static String checkAccession = "SRR000123"; - - @BeforeGroups(groups = "sra") - public final void checkIfCanResolve() { - if (SRAAccession.checkIfInitialized() != null) { - return; - } - canResolveNetworkAccession = SRAAccession.isValid(checkAccession); - } - - @BeforeMethod(groups = "sra") - public final void assertSRAIsSupported() { - if (SRAAccession.checkIfInitialized() != null) { - throw new SkipException("Skipping SRA Test because SRA native code is unavailable."); - } - } - - @BeforeMethod(groups = "sra") - public final void skipIfCantResolve(Method method, Object[] params) { - String accession = null; - - if (params.length > 0) { - Object firstParam = params[0]; - if (firstParam instanceof String) { - accession = (String) firstParam; - } else if (firstParam instanceof SRAAccession) { - accession = firstParam.toString(); - } - } - - if (accession != null - && accession.matches(SRAAccession.REMOTE_ACCESSION_PATTERN) - && !canResolveNetworkAccession) { - throw new SkipException( - "Skipping network SRA Test because cannot resolve remote SRA accession '" + checkAccession + "'."); - } - } - - /** - * Exhaust the iterator and check that it produce the expected number of mapped and unmapped reads. - * Also checks that the hasNext() agrees with the actual results of next() for the given iterator. - * @param expectedNumMapped expected number of mapped reads, specify -1 to skip this check - * @param expectedNumUnmapped expected number of unmapped reads, specify -1 to skip this check - */ - static void assertCorrectCountsOfMappedAndUnmappedRecords( - SAMRecordIterator samRecordIterator, int expectedNumMapped, int expectedNumUnmapped) { - int numMapped = 0, numUnmapped = 0; - while (true) { - boolean hasRecord = samRecordIterator.hasNext(); - SAMRecord record; - try { - record = samRecordIterator.next(); - Assert.assertNotNull(record); - Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point - } catch (final NoSuchElementException e) { - Assert.assertFalse(hasRecord); - break; - } - - if (record.getReadUnmappedFlag()) { - numUnmapped++; - } else { - numMapped++; - } - } - - if (expectedNumMapped != -1) { - Assert.assertEquals(numMapped, expectedNumMapped); - } - if (expectedNumUnmapped != -1) { - Assert.assertEquals(numUnmapped, expectedNumUnmapped); - } - } -} diff --git a/src/test/java/htsjdk/samtools/sra/SRAAccessionTest.java b/src/test/java/htsjdk/samtools/sra/SRAAccessionTest.java deleted file mode 100644 index 183cd30d19..0000000000 --- a/src/test/java/htsjdk/samtools/sra/SRAAccessionTest.java +++ /dev/null @@ -1,27 +0,0 @@ -package htsjdk.samtools.sra; - -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * Tests for SRAAccession logic - */ -public class SRAAccessionTest extends AbstractSRATest { - - @DataProvider(name = "isValidAccData") - private Object[][] getIsValidAccData() { - return new Object[][] { - {"SRR000123", true}, - {"DRR010511", true}, - {"src/test/resources/htsjdk/samtools/sra/test_archive.sra", true}, - {"src/test/resources/htsjdk/samtools/compressed.bam", false}, - {"src/test/resources/htsjdk/samtools/uncompressed.sam", false}, - }; - } - - @Test(dataProvider = "isValidAccData") - public void testIsValidAcc(String accession, boolean isValid) { - Assert.assertEquals(isValid, SRAAccession.isValid(accession)); - } -} diff --git a/src/test/java/htsjdk/samtools/sra/SRAIndexTest.java b/src/test/java/htsjdk/samtools/sra/SRAIndexTest.java deleted file mode 100644 index e85b826a69..0000000000 --- a/src/test/java/htsjdk/samtools/sra/SRAIndexTest.java +++ /dev/null @@ -1,163 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools.sra; - -import htsjdk.samtools.BAMFileSpan; -import htsjdk.samtools.Bin; -import htsjdk.samtools.GenomicIndexUtil; -import htsjdk.samtools.SRAFileReader; -import htsjdk.samtools.SRAIndex; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * Unit tests for SRAIndex - * - * Created by andrii.nikitiuk on 10/28/15. - */ -public class SRAIndexTest extends AbstractSRATest { - private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR2096940"); - private static final int LAST_BIN_LEVEL = GenomicIndexUtil.LEVEL_STARTS.length - 1; - private static final int SRA_BIN_OFFSET = GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL]; - - @Test - public void testLevelSize() { - final SRAIndex index = getIndex(DEFAULT_ACCESSION); - Assert.assertEquals(index.getLevelSize(0), GenomicIndexUtil.LEVEL_STARTS[1] - GenomicIndexUtil.LEVEL_STARTS[0]); - - Assert.assertEquals( - index.getLevelSize(LAST_BIN_LEVEL), - GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL] - 1); - } - - @Test - public void testLevelForBin() { - final SRAIndex index = getIndex(DEFAULT_ACCESSION); - final Bin bin = new Bin(0, SRA_BIN_OFFSET); - Assert.assertEquals(index.getLevelForBin(bin), LAST_BIN_LEVEL); - } - - @DataProvider(name = "testBinLocuses") - private Object[][] createDataForBinLocuses() { - return new Object[][] { - {DEFAULT_ACCESSION, 0, 0, 1, SRAIndex.SRA_BIN_SIZE}, - {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2} - }; - } - - @Test(dataProvider = "testBinLocuses") - public void testBinLocuses(SRAAccession acc, int reference, int binIndex, int firstLocus, int lastLocus) { - final SRAIndex index = getIndex(acc); - final Bin bin = new Bin(reference, SRA_BIN_OFFSET + binIndex); - - Assert.assertEquals(index.getFirstLocusInBin(bin), firstLocus); - Assert.assertEquals(index.getLastLocusInBin(bin), lastLocus); - } - - @DataProvider(name = "testBinOverlappings") - private Object[][] createDataForBinOverlappings() { - return new Object[][] { - {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new HashSet<>(Arrays.asList(0))}, - {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2, new HashSet<>(Arrays.asList(1)) - }, - { - DEFAULT_ACCESSION, - 0, - SRAIndex.SRA_BIN_SIZE + 1, - SRAIndex.SRA_BIN_SIZE * 3, - new HashSet<>(Arrays.asList(1, 2)) - }, - { - DEFAULT_ACCESSION, - 0, - SRAIndex.SRA_BIN_SIZE * 2, - SRAIndex.SRA_BIN_SIZE * 2 + 1, - new HashSet<>(Arrays.asList(1, 2)) - } - }; - } - - @Test(dataProvider = "testBinOverlappings") - public void testBinOverlappings( - SRAAccession acc, int reference, int firstLocus, int lastLocus, Set binNumbers) { - final SRAIndex index = getIndex(acc); - final Iterator binIterator = - index.getBinsOverlapping(reference, firstLocus, lastLocus).iterator(); - final Set binNumbersFromIndex = new HashSet<>(); - while (binIterator.hasNext()) { - final Bin bin = binIterator.next(); - binNumbersFromIndex.add(bin.getBinNumber() - SRA_BIN_OFFSET); - } - - Assert.assertEquals(binNumbers, binNumbersFromIndex); - } - - @DataProvider(name = "testSpanOverlappings") - private Object[][] createDataForSpanOverlappings() { - return new Object[][] { - {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new long[] {0, SRAIndex.SRA_CHUNK_SIZE}}, - { - DEFAULT_ACCESSION, - 0, - SRAIndex.SRA_BIN_SIZE * 2, - SRAIndex.SRA_BIN_SIZE * 2 + 1, - new long[] {0, SRAIndex.SRA_CHUNK_SIZE} - }, - { - DEFAULT_ACCESSION, - 0, - SRAIndex.SRA_CHUNK_SIZE, - SRAIndex.SRA_CHUNK_SIZE + 1, - new long[] {0, SRAIndex.SRA_CHUNK_SIZE, SRAIndex.SRA_CHUNK_SIZE, SRAIndex.SRA_CHUNK_SIZE * 2} - }, - }; - } - - @Test(dataProvider = "testSpanOverlappings") - public void testSpanOverlappings( - SRAAccession acc, int reference, int firstLocus, int lastLocus, long[] spanCoordinates) { - final SRAIndex index = getIndex(acc); - final BAMFileSpan span = index.getSpanOverlapping(reference, firstLocus, lastLocus); - - long[] coordinatesFromIndex = span.toCoordinateArray(); - - Assert.assertTrue( - Arrays.equals(coordinatesFromIndex, spanCoordinates), - "Coordinates mismatch. Expected: " + Arrays.toString(spanCoordinates) + " but was : " - + Arrays.toString(coordinatesFromIndex)); - } - - private SRAIndex getIndex(SRAAccession acc) { - final SRAFileReader reader = new SRAFileReader(acc); - return (SRAIndex) reader.getIndex(); - } -} diff --git a/src/test/java/htsjdk/samtools/sra/SRALazyRecordTest.java b/src/test/java/htsjdk/samtools/sra/SRALazyRecordTest.java deleted file mode 100644 index a5d87ffc7e..0000000000 --- a/src/test/java/htsjdk/samtools/sra/SRALazyRecordTest.java +++ /dev/null @@ -1,49 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SRAFileReader; -import htsjdk.samtools.util.TestUtil; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * Tests for SRA extension of SAMRecord objects which load fields on demand - */ -public class SRALazyRecordTest extends AbstractSRATest { - private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR2096940"); - - @DataProvider(name = "serializationTestData") - private Object[][] getSerializationTestData() { - return new Object[][] {{DEFAULT_ACCESSION}}; - } - - @Test(dataProvider = "serializationTestData") - public void testSerialization(final SRAAccession accession) throws Exception { - final SRAFileReader reader = new SRAFileReader(accession); - final SAMRecord initialSAMRecord = reader.getIterator().next(); - reader.close(); - - final SAMRecord deserializedSAMRecord = TestUtil.serializeAndDeserialize(initialSAMRecord); - - Assert.assertEquals( - deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord"); - } - - @Test(dataProvider = "serializationTestData") - public void testCloneAndEquals(final SRAAccession accession) throws Exception { - final SRAFileReader reader = new SRAFileReader(accession); - final SAMRecord record = reader.getIterator().next(); - reader.close(); - - final SAMRecord newRecord = (SAMRecord) record.clone(); - Assert.assertFalse(record == newRecord); - Assert.assertNotSame(record, newRecord); - Assert.assertEquals(record, newRecord); - Assert.assertEquals(newRecord, record); - - newRecord.setAlignmentStart(record.getAlignmentStart() + 100); - Assert.assertFalse(record.equals(newRecord)); - Assert.assertFalse(newRecord.equals(record)); - } -} diff --git a/src/test/java/htsjdk/samtools/sra/SRAQueryTest.java b/src/test/java/htsjdk/samtools/sra/SRAQueryTest.java deleted file mode 100644 index 49330abd30..0000000000 --- a/src/test/java/htsjdk/samtools/sra/SRAQueryTest.java +++ /dev/null @@ -1,76 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SamInputResource; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; -import htsjdk.samtools.ValidationStringency; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -public class SRAQueryTest extends AbstractSRATest { - - @DataProvider(name = "testUnmappedCounts") - private Object[][] createDataForUnmappedCounts() { - return new Object[][] {{"SRR2096940", 498}}; - } - - @Test(dataProvider = "testUnmappedCounts") - public void testUnmappedCounts(String acc, int expectedNumUnmapped) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = reader.queryUnmapped(); - - assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, 0, expectedNumUnmapped); - } - - @DataProvider(name = "testReferenceAlignedCounts") - private Object[][] createDataForReferenceAlignedCounts() { - return new Object[][] { - {"SRR2096940", "CM000681.1", 0, 10591}, - {"SRR2096940", "CM000681.1", 55627015, 10591}, - {"SRR2096940", "CM000681.1", 55627016, 0}, - }; - } - - @Test(dataProvider = "testReferenceAlignedCounts") - public void testReferenceAlignedCounts(String acc, String reference, int referenceStart, int expectedNumMapped) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = reader.queryAlignmentStart(reference, referenceStart); - - assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, 0); - } - - @DataProvider(name = "testQueryCounts") - private Object[][] createDataForQueryCounts() { - return new Object[][] { - {"SRR2096940", "CM000681.1", 0, 59128983, true, 10591, 0}, - {"SRR2096940", "CM000681.1", 55627015, 59128983, true, 10591, 0}, - {"SRR2096940", "CM000681.1", 55627016, 59128983, true, 0, 0}, - {"SRR2096940", "CM000681.1", 55627016, 59128983, false, 10591, -1}, - }; - } - - @Test(dataProvider = "testQueryCounts") - public void testQueryCounts( - String acc, - String reference, - int referenceStart, - int referenceEnd, - boolean contained, - int expectedNumMapped, - int expectedNumUnmapped) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = reader.query(reference, referenceStart, referenceEnd, contained); - - assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, expectedNumUnmapped); - } -} diff --git a/src/test/java/htsjdk/samtools/sra/SRAReferenceTest.java b/src/test/java/htsjdk/samtools/sra/SRAReferenceTest.java deleted file mode 100644 index b0332510b9..0000000000 --- a/src/test/java/htsjdk/samtools/sra/SRAReferenceTest.java +++ /dev/null @@ -1,99 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.samtools.reference.ReferenceSequence; -import htsjdk.samtools.reference.ReferenceSequenceFile; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -public class SRAReferenceTest extends AbstractSRATest { - @DataProvider(name = "testReference") - private Object[][] createDataForReference() { - return new Object[][] { - {"SRR2096940", "CM000681.1", 95001, 95050, "AGATGATTCAGTCTCACCAAGAACACTGAAAGTCACATGGCTACCAGCAT"}, - }; - } - - @Test(dataProvider = "testReference") - public void testReference(String acc, String refContig, int refStart, int refStop, String refBases) { - final ReferenceSequenceFile refSeqFile = new SRAIndexedSequenceFile(new SRAAccession(acc)); - final ReferenceSequence refSeq = refSeqFile.getSubsequenceAt(refContig, refStart, refStop); - Assert.assertEquals(new String(refSeq.getBases()), refBases); - } - - class TestReferenceMtData { - String refContig; - int refStart; - int refStop; - String refBases; - - TestReferenceMtData(String refContig, int refStart, int refStop, String refBases) { - this.refContig = refContig; - this.refStart = refStart; - this.refStop = refStop; - this.refBases = refBases; - } - - @Override - public String toString() { - return refContig + ":" + refStart + "-" + refStop + " = " + refBases; - } - } - - @DataProvider(name = "testReferenceMt") - private Object[][] createDataForReferenceMt() { - return new Object[][] { - { - "SRR353866", - Arrays.asList( - new TestReferenceMtData( - "AAAB01001871.1", 1, 50, "TGACGCGCATGAATGGATTAACGAGATTCCCTCTGTCCCTATCTACTATC"), - new TestReferenceMtData( - "AAAB01001871.1", 901, 950, "ACCAAGCGTACGATTGTTCACCCTTTCAAGGGAACGTGAGCTGGGTTTAG"), - new TestReferenceMtData( - "AAAB01008987.1", 1, 50, "TTTTGGACGATGTTTTTGGTGAACAGAAAACGAGCTCAATCATCCAGAGC"), - new TestReferenceMtData( - "AAAB01008859.1", 1, 50, "CAAAACGATGCCACAGATCAGAAGTTAATTAACGCACATTCTCCACCCAC")) - }, - }; - } - - @Test(dataProvider = "testReferenceMt") - public void testReferenceMt(String acc, List parallelTests) throws Exception { - final ReferenceSequenceFile refSeqFile = new SRAIndexedSequenceFile(new SRAAccession(acc)); - final long timeout = 1000L * 5; // just in case - final List threads = new ArrayList(parallelTests.size()); - final Map runErrors = - Collections.synchronizedMap(new HashMap()); - for (final TestReferenceMtData testData : parallelTests) { - threads.add(new Thread() { - @Override - public void run() { - try { - final ReferenceSequence refSeq = - refSeqFile.getSubsequenceAt(testData.refContig, testData.refStart, testData.refStop); - Assert.assertEquals(new String(refSeq.getBases()), testData.refBases); - } catch (final Exception e) { - Assert.assertNull(runErrors.put(testData, e)); - } - } - }); - } - for (final Thread thread : threads) { - thread.start(); - } - for (final Thread thread : threads) { - thread.join(timeout); - } - for (final Map.Entry result : runErrors.entrySet()) { - // Will fail only on the first, but a debugger will be able to see all the results. - Assert.fail("failed: " + result.getKey(), result.getValue()); - } - } -} diff --git a/src/test/java/htsjdk/samtools/sra/SRATest.java b/src/test/java/htsjdk/samtools/sra/SRATest.java deleted file mode 100644 index ea68ca9f24..0000000000 --- a/src/test/java/htsjdk/samtools/sra/SRATest.java +++ /dev/null @@ -1,610 +0,0 @@ -/*=========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - */ - -package htsjdk.samtools.sra; - -import htsjdk.samtools.BAMFileSpan; -import htsjdk.samtools.BrowseableBAMIndex; -import htsjdk.samtools.Chunk; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMReadGroupRecord; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SAMUtils; -import htsjdk.samtools.SAMValidationError; -import htsjdk.samtools.SamInputResource; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; -import htsjdk.samtools.ValidationStringency; -import java.util.Arrays; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * Integration tests for SRA functionality - * - * Created by andrii.nikitiuk on 8/24/15. - */ -public class SRATest extends AbstractSRATest { - - @DataProvider(name = "testCounts") - private Object[][] createDataForCounts() { - return new Object[][] { - {"SRR2096940", 10591, 498}, - {"SRR000123", 0, 4583} - }; - } - - @Test(dataProvider = "testCounts") - public void testCounts(String acc, int expectedNumMapped, int expectedNumUnmapped) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = reader.iterator(); - - assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, expectedNumUnmapped); - } - - @DataProvider(name = "testCountsBySpan") - private Object[][] createDataForCountsBySpan() { - return new Object[][] { - {"SRR2096940", Arrays.asList(new Chunk(0, 59128983), new Chunk(59128983, 59141089)), 10591, 498}, - {"SRR2096940", Arrays.asList(new Chunk(0, 29128983), new Chunk(29128983, 59141089)), 10591, 498}, - {"SRR2096940", Arrays.asList(new Chunk(0, 59134983), new Chunk(59134983, 59141089)), 10591, 498}, - {"SRR2096940", Arrays.asList(new Chunk(0, 59130000)), 10591, 0}, - {"SRR2096940", Arrays.asList(new Chunk(0, 59140889)), 10591, 298} - }; - } - - @Test(dataProvider = "testCountsBySpan") - public void testCountsBySpan(String acc, List chunks, int expectedNumMapped, int expectedNumUnmapped) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(new BAMFileSpan(chunks)); - - assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, expectedNumUnmapped); - } - - @DataProvider(name = "testGroups") - private Object[][] createDataForGroups() { - return new Object[][] { - {"SRR1035115", new TreeSet<>(Arrays.asList("15656144_B09YG", "15656144_B09MR"))}, - {"SRR2096940", new TreeSet<>(Arrays.asList("SRR2096940"))} - }; - } - - @Test(dataProvider = "testGroups") - public void testGroups(String acc, Set groups) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = reader.iterator(); - - SAMFileHeader header = reader.getFileHeader(); - Set headerGroups = new TreeSet<>(); - for (SAMReadGroupRecord group : header.getReadGroups()) { - Assert.assertEquals(group.getReadGroupId(), group.getId()); - headerGroups.add(group.getReadGroupId()); - } - - Assert.assertEquals(groups, headerGroups); - - Set foundGroups = new TreeSet<>(); - - for (int i = 0; i < 10000; i++) { - if (!samRecordIterator.hasNext()) { - break; - } - SAMRecord record = samRecordIterator.next(); - String groupName = (String) record.getAttribute("RG"); - - foundGroups.add(groupName); - } - - // please note that some groups may be introduced after 10k records, which is not an error - Assert.assertEquals(groups, foundGroups); - } - - @DataProvider(name = "testReferences") - private Object[][] createDataForReferences() { - return new Object[][] { - // primary alignment only - { - "SRR353866", - 9, - Arrays.asList( - "AAAB01001871.1", - "AAAB01002233.1", - "AAAB01004056.1", - "AAAB01006027.1", - "AAAB01008846.1", - "AAAB01008859.1", - "AAAB01008960.1", - "AAAB01008982.1", - "AAAB01008987.1"), - Arrays.asList(1115, 1034, 1301, 1007, 11308833, 12516315, 23099915, 1015562, 16222597) - }, - }; - } - - @Test(dataProvider = "testReferences") - public void testReferences( - String acc, int numberFirstReferenceFound, List references, List refLengths) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = reader.iterator(); - - SAMFileHeader header = reader.getFileHeader(); - Set headerRefNames = new TreeSet<>(); - - for (SAMSequenceRecord ref : header.getSequenceDictionary().getSequences()) { - String refName = ref.getSequenceName(); - - int refIndex = references.indexOf(refName); - Assert.assertTrue(refIndex != -1, "Unexpected reference: " + refName); - - Assert.assertEquals( - refLengths.get(refIndex), (Integer) ref.getSequenceLength(), "Reference length is incorrect"); - - headerRefNames.add(refName); - } - - Assert.assertEquals(new TreeSet<>(references), headerRefNames); - - Set foundRefNames = new TreeSet<>(); - for (int i = 0; i < 10000; i++) { - if (!samRecordIterator.hasNext()) { - break; - } - SAMRecord record = samRecordIterator.next(); - - if (record.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)) { - continue; - } - - String refName = record.getReferenceName(); - Assert.assertNotNull(refName); - - foundRefNames.add(refName); - } - - Assert.assertEquals(new TreeSet<>(references.subList(0, numberFirstReferenceFound)), foundRefNames); - } - - @DataProvider(name = "testRows") - private Object[][] createDataForRowsTest() { - return new Object[][] { - // primary alignment only - { - "SRR2127895", - 1, - 83, - "SRR2127895.R.1", - "CGTGCGCGTGACCCATCAGATGCTGTTCAATCAGTGGCAAATGCGGAACGGTTTCTGCGGGTTGCCGATATTCTGGAGAGTAATGCCAGGCAGGGGCAGGT", - "DDBDDDDDBCABC@CCDDDC?99CCA:CDCDDDDDDDECDDDFFFHHHEGIJIIGIJIHIGJIJJJJJJJIIJIIHIGJIJJJIJJIHFFBHHFFFDFBBB", - 366, - "29S72M", - "gi|152968582|ref|NC_009648.1|", - 147, - true, - false, - false - }, - - // small SRA archive - { - "SRR2096940", - 1, - 16, - "SRR2096940.R.3", - "GTGTGTCACCAGATAAGGAATCTGCCTAACAGGAGGTGTGGGTTAGACCCAATATCAGGAGACCAGGAAGGAGGAGGCCTAAGGATGGGGCTTTTCTGTCACCAATCCTGTCCCTAGTGGCCCCACTGTGGGGTGGAGGGGACAGATAAAAGTACCCAGAACCAGAG", - "AAAABFFFFFFFGGGGGGGGIIIIIIIIIIIIIIIIIIIIIIIIIIIIII7IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGGGGGFGFFDFFFFFC", - 55627016, - "167M", - "CM000681.1", - 42, - false, - false, - false - }, - { - "SRR2096940", - 10591, - 4, - "SRR2096940.R.10592", - "CTCTGGTTCTGGGTACTTTTATCTGTCCCCTCCACCCCACAGTGGCGAGCCAGATTCCTTATCTGGTGACACAC", - "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", - -1, - null, - null, - -1, - false, - false, - false - }, - - // primary and secondary alignments - { - "SRR833251", - 81, - 393, - "SRR833251.R.51", - "ATGCAAATCCGAATGGGCTATTTGTGGGTACTTGGGCAGGTAAGTAGCTGGCAATCTTGGTCGGTAAACCAATACCCAAGTTCACATAGGCACCATCGGGA", - "CCCFFFFFHHHHHIJJJIJJJJJIIJJJGIJIJIIJIJJJDGIGIIJIJIHIJJJJJJGIGHIHEDFFFFDDEEEDDDDDCDEEDDDDDDDDDDDDDBBDB", - 1787186, - "38M63S", - "gi|169794206|ref|NC_010410.1|", - 11, - true, - true, - true - }, - - // local SRA file - { - "src/test/resources/htsjdk/samtools/sra/test_archive.sra", - 1, - 99, - "test_archive.R.2", - "TGTCGATGCTGAAAGTGTCTGCGGTGAACCACTTCATGCACAGCGCACACTGCAGCTCCACTTCACCCAGCTGACGGCCGTTCTCATCGTCTCCAGAGCCCGTCTGAGCGTCCGCTGCTTCAGAACTGTCCCCGGCTGTATCCTGAAGAC", - "BBAABBBFAFFFGGGGGGGGGGGGEEFHHHHGHHHHHFHHGHFDGGGGGHHGHHHHHHHHHHHHFHHHGHHHHHHGGGGGGGHGGHHHHHHHHHGHHHHHGGGGHGHHHGGGGGGGGGHHHHEHHHHHHHHHHGCGGGHHHHHHGBFFGF", - 2811570, - "150M", - "NC_007121.5", - 60, - true, - false, - false - } - }; - } - - @Test(dataProvider = "testRows") - public void testRows( - String acc, - int recordIndex, - int flags, - String readName, - String bases, - String quals, - int refStart, - String cigar, - String refName, - int mapQ, - boolean hasMate, - boolean isSecondOfPair, - boolean isSecondaryAlignment) { - SAMRecord record = getRecordByIndex(acc, recordIndex, false); - - checkSAMRecord( - record, - flags, - readName, - bases, - quals, - refStart, - cigar, - refName, - mapQ, - hasMate, - isSecondOfPair, - isSecondaryAlignment); - } - - @Test(dataProvider = "testRows") - public void testRowsAfterIteratorDetach( - String acc, - int recordIndex, - int flags, - String readName, - String bases, - String quals, - int refStart, - String cigar, - String refName, - int mapQ, - boolean hasMate, - boolean isSecondOfPair, - boolean isSecondaryAlignment) { - SAMRecord record = getRecordByIndex(acc, recordIndex, true); - - checkSAMRecord( - record, - flags, - readName, - bases, - quals, - refStart, - cigar, - refName, - mapQ, - hasMate, - isSecondOfPair, - isSecondaryAlignment); - } - - @Test(dataProvider = "testRows") - public void testRowsOverrideValues( - String acc, - int recordIndex, - int flags, - String readName, - String bases, - String quals, - int refStart, - String cigar, - String refName, - int mapQ, - boolean hasMate, - boolean isSecondOfPair, - boolean isSecondaryAlignment) { - SAMRecord record = getRecordByIndex(acc, recordIndex, true); - SAMFileHeader header = record.getHeader(); - - record.setFlags(0); - record.setReadUnmappedFlag(refStart == -1); - record.setReadBases("C".getBytes()); - record.setBaseQualities(SAMUtils.fastqToPhred("A")); - if (refStart == -1) { - checkSAMRecord(record, 4, readName, "C", "A", refStart, "1M", refName, mapQ, false, false, false); - } else { - int sequenceIndex = header.getSequenceIndex(refName); - Assert.assertFalse(sequenceIndex == -1); - - if (sequenceIndex == 0) { - if (header.getSequenceDictionary().getSequences().size() > 1) { - sequenceIndex++; - } - } else { - sequenceIndex--; - } - - refName = header.getSequence(sequenceIndex).getSequenceName(); - - record.setAlignmentStart(refStart - 100); - record.setCigarString("1M"); - record.setMappingQuality(mapQ - 1); - record.setReferenceIndex(sequenceIndex); - - checkSAMRecord(record, 0, readName, "C", "A", refStart - 100, "1M", refName, mapQ - 1, false, false, false); - } - } - - @Test(dataProvider = "testRows") - public void testRowsBySpan( - String acc, - int recordIndex, - int flags, - String readName, - String bases, - String quals, - int refStart, - String cigar, - String refName, - int mapQ, - boolean hasMate, - boolean isSecondOfPair, - boolean isSecondaryAlignment) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - SAMFileHeader header = reader.getFileHeader(); - - Chunk chunk; - if (refStart != -1) { - long refOffset = 0; - int refIndex = header.getSequenceDictionary().getSequence(refName).getSequenceIndex(); - for (SAMSequenceRecord sequenceRecord : - header.getSequenceDictionary().getSequences()) { - if (sequenceRecord.getSequenceIndex() < refIndex) { - refOffset += sequenceRecord.getSequenceLength(); - } - } - - chunk = new Chunk(refOffset + refStart - 1, refOffset + refStart); - } else { - long totalRefLength = header.getSequenceDictionary().getReferenceLength(); - long totalRecordRange = - ((BAMFileSpan) reader.indexing().getFilePointerSpanningReads()).toCoordinateArray()[1]; - chunk = new Chunk(totalRefLength, totalRecordRange); - } - - final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(new BAMFileSpan(chunk)); - - SAMRecord record = null; - while (samRecordIterator.hasNext()) { - SAMRecord currentRecord = samRecordIterator.next(); - if (currentRecord.getReadName().equals(readName)) { - record = currentRecord; - break; - } - } - - checkSAMRecord( - record, - flags, - readName, - bases, - quals, - refStart, - cigar, - refName, - mapQ, - hasMate, - isSecondOfPair, - isSecondaryAlignment); - } - - @Test(dataProvider = "testRows") - public void testRowsByIndex( - String acc, - int recordIndex, - int flags, - String readName, - String bases, - String quals, - int refStart, - String cigar, - String refName, - int mapQ, - boolean hasMate, - boolean isSecondOfPair, - boolean isSecondaryAlignment) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - Assert.assertTrue(reader.hasIndex()); - Assert.assertTrue(reader.indexing().hasBrowseableIndex()); - - SAMFileHeader header = reader.getFileHeader(); - BrowseableBAMIndex index = reader.indexing().getBrowseableIndex(); - - BAMFileSpan span; - if (refStart != -1) { - int refIndex = header.getSequenceDictionary().getSequence(refName).getSequenceIndex(); - span = index.getSpanOverlapping(refIndex, refStart, refStart + 1); - } else { - long chunkStart = index.getStartOfLastLinearBin(); - long totalRecordRange = - ((BAMFileSpan) reader.indexing().getFilePointerSpanningReads()).toCoordinateArray()[1]; - span = new BAMFileSpan(new Chunk(chunkStart, totalRecordRange)); - } - - final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(span); - - SAMRecord record = null; - while (samRecordIterator.hasNext()) { - SAMRecord currentRecord = samRecordIterator.next(); - if (refStart != -1 && currentRecord.getAlignmentStart() + currentRecord.getReadLength() < refStart) { - continue; - } - - if (currentRecord.getReadName().equals(readName) - && currentRecord.isSecondaryAlignment() == isSecondaryAlignment - && (!hasMate || currentRecord.getSecondOfPairFlag() == isSecondOfPair)) { - record = currentRecord; - break; - } - } - - checkSAMRecord( - record, - flags, - readName, - bases, - quals, - refStart, - cigar, - refName, - mapQ, - hasMate, - isSecondOfPair, - isSecondaryAlignment); - } - - private SAMRecord getRecordByIndex(String acc, int recordIndex, boolean detach) { - SamReader reader = SamReaderFactory.make() - .validationStringency(ValidationStringency.SILENT) - .open(SamInputResource.of(new SRAAccession(acc))); - - final SAMRecordIterator samRecordIterator = reader.iterator(); - - while (recordIndex != 0) { - Assert.assertTrue(samRecordIterator.hasNext(), "Record set is too small"); - - samRecordIterator.next(); - recordIndex--; - } - Assert.assertTrue(samRecordIterator.hasNext(), "Record set is too small"); - - SAMRecord record = samRecordIterator.next(); - - if (detach) { - samRecordIterator.next(); - } - - return record; - } - - private void checkSAMRecord( - SAMRecord record, - int flags, - String readName, - String bases, - String quals, - int refStart, - String cigar, - String refName, - int mapQ, - boolean hasMate, - boolean isSecondOfPair, - boolean isSecondaryAlignment) { - - Assert.assertNotNull(record, "Record with read id: " + readName + " was not found by span created from index"); - - List validationErrors = record.isValid(); - Assert.assertNull( - validationErrors, - "SRA Lazy record is invalid. List of errors: " - + (validationErrors != null ? validationErrors.toString() : "")); - - Assert.assertEquals(record.getReadName(), readName); - Assert.assertEquals(new String(record.getReadBases()), bases); - Assert.assertEquals(record.getBaseQualityString(), quals); - Assert.assertEquals(record.getReadPairedFlag(), hasMate); - Assert.assertEquals(record.getFlags(), flags); - Assert.assertEquals(record.isSecondaryAlignment(), isSecondaryAlignment); - if (hasMate) { - Assert.assertEquals(record.getSecondOfPairFlag(), isSecondOfPair); - } - if (refStart == -1) { - Assert.assertEquals(record.getReadUnmappedFlag(), true); - Assert.assertEquals(record.getAlignmentStart(), 0); - Assert.assertEquals(record.getCigarString(), "*"); - Assert.assertEquals(record.getReferenceName(), "*"); - Assert.assertEquals(record.getMappingQuality(), 0); - } else { - Assert.assertEquals(record.getReadUnmappedFlag(), false); - Assert.assertEquals(record.getAlignmentStart(), refStart); - Assert.assertEquals(record.getCigarString(), cigar); - Assert.assertEquals(record.getReferenceName(), refName); - Assert.assertEquals(record.getMappingQuality(), mapQ); - } - } -} diff --git a/src/test/resources/htsjdk/samtools/sra/test_archive.sra b/src/test/resources/htsjdk/samtools/sra/test_archive.sra deleted file mode 100644 index a9b6e70f30..0000000000 Binary files a/src/test/resources/htsjdk/samtools/sra/test_archive.sra and /dev/null differ